Wisenheimer Brainstorm Wiki

Site Tools

notes:sql:techniques_treeanalysis

Tree Analysis

We present two techniques of analyzing tree-like data structures:

• using a non-recursive procedure
• using a recursive procedure

The first technique uses a non-recursive stored procedure to find the path and the level of each leaf in a tree-like table. Copy and execute the script:

```-- Create a tree structure
IF OBJECT_ID('DataTree') IS NOT NULL DROP TABLE DataTree
GO
CREATE TABLE DataTree
(
NodeID INT NOT NULL PRIMARY KEY,
ParentID INT NOT NULL DEFAULT 0,
[Name] VARCHAR(1000) NOT NULL DEFAULT ''
)
GO

-- Insert sample data
INSERT INTO DataTree VALUES(2,0,'Poland')
INSERT INTO DataTree VALUES(3,1,'Ontario')
INSERT INTO DataTree VALUES(4,3,'Toronto')
INSERT INTO DataTree VALUES(5,3,'Mississauga')
INSERT INTO DataTree VALUES(6,2,'Warszawa')
INSERT INTO DataTree VALUES(7,2,'Lublin')
INSERT INTO DataTree VALUES(8,1,'Quebec')
INSERT INTO DataTree VALUES(9,8,'Montreal')
INSERT INTO DataTree VALUES(10,4,'North York')
INSERT INTO DataTree VALUES(11,4,'Etobicoke')
INSERT INTO DataTree VALUES(12,6,'Ursus')
INSERT INTO DataTree VALUES(13,6,'Bielany')

IF OBJECT_ID('dbo.sp_ProcessTree') IS NOT NULL DROP PROC dbo.sp_ProcessTree
GO

CREATE PROC dbo.sp_ProcessTree
@NodeID INT -- root node
AS
SET NOCOUNT ON

-- Create a flat structure to hold level numbers and paths
CREATE TABLE #data
(
NodeID INT NOT NULL PRIMARY KEY,
ParentID INT NOT NULL DEFAULT 0,
[Name] VARCHAR(1000) NOT NULL DEFAULT '',
[Path] VARCHAR(2000) NOT NULL DEFAULT '',
[Level] INT NOT NULL DEFAULT 0
)

DECLARE @ParentID INT, @Level INT, @Name VARCHAR(1000), @Path VARCHAR(2000)
DECLARE @LevelCounter INT, @RowCounter INT

INSERT INTO #data SELECT NodeID, ParentID, [Name], [Name], 1 FROM DataTree WHERE ParentID = @NodeID

SET @LevelCounter = 1
SET @RowCounter = 1

WHILE @RowCounter > 0
BEGIN
SET @RowCounter = 0

DECLARE cur CURSOR FOR SELECT NodeID, ParentID, [Name], [Path]
FROM #data WHERE [Level] = @LevelCounter
OPEN cur FETCH NEXT FROM cur INTO @NodeID, @ParentID, @Name, @Path

WHILE @@FETCH_STATUS=0
BEGIN
INSERT INTO #data SELECT NodeID, ParentID, [Name], @Path + '/' + [Name], @LevelCounter + 1
FROM DataTree WHERE ParentID = @NodeID

SET @RowCounter = @RowCounter + @@ROWCOUNT
FETCH NEXT FROM cur INTO @NodeID, @ParentID, @Name, @Path
END

CLOSE cur
DEALLOCATE cur

SET @LevelCounter = @LevelCounter + 1
END

-- Show results
SELECT * FROM #data

-- Clean-up
DROP TABLE #data

SET NOCOUNT OFF
GO```

Show results:

`EXEC dbo.sp_ProcessTree 1 -- root node = Canada`
```NodeID  ParentID  Name          Path                        Level
------  --------  ------------  --------------------------  -----
3	1         Ontario	Ontario                     1
4	3         Toronto	Ontario/Toronto             2
5	3         Mississauga	Ontario/Mississauga         2
8	1         Quebec	Quebec                      1
9	8         Montreal	Quebec/Montreal             2
10	4         North York	Ontario/Toronto/North York  3
11	4         Etobicoke	Ontario/Toronto/Etobicoke   3```
`EXEC dbo.sp_ProcessTree 2 -- root node = Poland`
```NodeID  ParentID  Name          Path                        Level
------  --------  ------------  --------------------------  -----
6	2         Warszawa      Warszawa                    1
7	2         Lublin        Lublin                      1
12	6         Ursus         Warszawa/Ursus              2
13	6         Bielany       Warszawa/Bielany            2```

Another stored procedure determines levels in a tree-like table using recursion. “Root messages” (i.e. messages that do not have any parent) have ID = 0. All other messages have exactly one parent: a root message or another message.

Create our tree-like table:

```IF OBJECT_ID('Message') IS NOT NULL DROP TABLE [Message]
GO

CREATE TABLE [Message]
(
Id INT NOT NULL PRIMARY KEY,
ParentId INT NOT NULL DEFAULT 0,
MessageText NVARCHAR(200) NOT NULL DEFAULT '',
Level INT NOT NULL DEFAULT 0,

--  a self-reference foreign key
CONSTRAINT FK_Message FOREIGN KEY (Id) REFERENCES [Message](Id)
)
GO```

Create the recursive stored procedure sp_UpdateMessageLevel:

```IF OBJECT_ID('sp_UpdateMessageLevel') IS NOT NULL DROP PROC sp_UpdateMessageLevel
GO
CREATE PROC sp_UpdateMessageLevel AS DECLARE @Dummy INT
GO

ALTER PROC sp_UpdateMessageLevel
@MessageId INT = NULL,
@ParentId INT = NULL,
@Level INT = 1
AS
SET NOCOUNT ON

IF @MessageId IS NULL
SELECT TOP 1 @MessageId=Id, @ParentId=ParentId FROM [Message] WHERE Level=0

IF @MessageId IS NOT NULL
BEGIN
IF @ParentID=0
BEGIN
UPDATE [Message] SET Level=@Level WHERE Id=@MessageId
SELECT @MessageId=NULL, @Level=1
END
ELSE
BEGIN
SET @Level=@Level+1
SELECT @ParentId=ParentId FROM [Message] WHERE Id=@ParentId
END

EXEC sp_UpdateMessageLevel @MessageId, @ParentId, @Level
END

SET NOCOUNT OFF
GO```

Insert some sample data to the Message table:

```INSERT INTO [Message] VALUES (1, 0, 'Message 1', 0)
INSERT INTO [Message] VALUES (2, 1, 'Message 1.1', 0)
INSERT INTO [Message] VALUES (3, 1, 'Message 1.2', 0)
INSERT INTO [Message] VALUES (4, 2, 'Message 1.1.1', 0)
INSERT INTO [Message] VALUES (5, 2, 'Message 1.1.2', 0)
INSERT INTO [Message] VALUES (6, 3, 'Message 1.2.1', 0)
INSERT INTO [Message] VALUES (7, 1, 'Message 1.3', 0)
INSERT INTO [Message] VALUES (8, 0, 'Message 2', 0)
INSERT INTO [Message] VALUES (9, 8, 'Message 2.1', 0)```

Execute the stored procedure and show the results:

```EXEC sp_UpdateMessageLevel
SELECT * FROM [Message]```
```Id  ParentId  MessageText    Level
--  --------  -------------  -----
1         0   Message 1          1
2         1   Message 1.1        2
3         1   Message 1.2        2
4         2   Message 1.1.1      3
5         2   Message 1.1.2      3
6         3   Message 1.2.1      3
7         1   Message 1.3        2
8         0   Message 2          1
9         8   Message 2.1        2```

As an additional test, let's use an unsorted set of messages:

```DELETE FROM [Message]
INSERT INTO [Message] VALUES (10, 18, 'Message 2.1', 0)
INSERT INTO [Message] VALUES (13, 23, 'Message 1.2.1', 0)
INSERT INTO [Message] VALUES (12, 81, 'Message 1.1', 0)
INSERT INTO [Message] VALUES (18, 0,  'Message 2', 0)
INSERT INTO [Message] VALUES (23, 81, 'Message 1.2', 0)
INSERT INTO [Message] VALUES (51, 12, 'Message 1.1.2', 0)
INSERT INTO [Message] VALUES (75, 81, 'Message 1.3', 0)
INSERT INTO [Message] VALUES (81, 0,  'Message 1', 0)
INSERT INTO [Message] VALUES (92, 12, 'Message 1.1.1', 0) ```

The stored procedure produces the same results (albeit unsorted):

```Id  ParentId   MessageText    Level
--  --------   -------------  -----
10        18   Message 2.1        2
12        81   Message 1.1        2
13        23   Message 1.2.1      3
18         0   Message 2          1
23        81   Message 1.2        2
51        12   Message 1.1.2      3
75        81   Message 1.3        2
81         0   Message 1          1
92        12   Message 1.1.1      3```