My SQL Server table looks like this:
id Distance a b Grp
--------------------------------
1 0.0000000000 100 114 NULL
2 0.1000000000 64 125 NULL
3 0.1000000000 88 100 NULL
4 0.1000000000 65 125 NULL
5 0.1000000000 63 64 NULL
6 0.1000000000 65 66 NULL
7 0.2000000000 63 66 NULL
8 0.2000000000 10 61 NULL
9 0.2000000000 19 61 NULL
10 0.2000000000 30 61 NULL
11 0.2000000000 10 65 NULL
12 0.2000000000 10 94 NULL
13 0.2000000000 19 65 NULL
14 0.2000000000 19 94 NULL
15 0.2000000000 30 94 NULL
16 0.2000000000 60 94 NULL
17 0.2000000000 61 94 NULL
The Grp
column should be filled as follows
first record
Grp
is 1if the next row's values of a & b are in any of the previous rows, then it will take the first rows
Grp
valueif the next row's values of a & b are not in any of the previous rows, then
Grp
value will bemax Grp 1
if the record id = 3 then the value of b = 100 which exists in the previous rows, the first one it appears in was id = 1 which is
Grp = 1
so theGrp
will be 1 for id 3
This is what my table should look like:
id Distance a b Grp
--------------------------------
1 0.0000000000 100 114 1
2 0.1000000000 64 125 2
3 0.1000000000 88 100 1
4 0.1000000000 65 125 2
5 0.1000000000 63 64 2
6 0.1000000000 65 66 2
7 0.2000000000 63 66 2
8 0.2000000000 10 61 3
9 0.2000000000 19 61 3
10 0.2000000000 30 61 3
11 0.2000000000 10 65 2
12 0.2000000000 10 94 3
13 0.2000000000 19 65 2
14 0.2000000000 19 94 3
15 0.2000000000 30 94 3
16 0.2000000000 60 94 3
17 0.2000000000 61 94 3
I have built this script that works fine, but it's extremely slow, any way I can get it better (without a loop)?
DECLARE @T AS TABLE
(
id int IDENTITY,
Distance decimal(18, 10),
a int,
b int,
Grp int
)
INSERT INTO @T(Distance, a, b)
SELECT Distance, a, b
FROM MyTable
ORDER BY Distance
UPDATE @T
SET Grp = 1
WHERE id = 1
DECLARE @i int = 2, @max int, @min int,
@grp int, @a int, @b int, @maxgrp int = 1
SELECT @max = MAX(id) FROM @T
WHILE @i <= @max
BEGIN
SELECT @a = a, @b = b
FROM @T
WHERE id = @i
SELECT @min = MIN(id)
FROM @T
WHERE id < @i AND a IN (@a, @b) OR b IN (@a, @b)
SELECT @grp = grp
FROM @T
WHERE id = @min
IF @grp IS NULL
BEGIN
SET @maxgrp = @maxgrp 1
SET @grp = @maxgrp
END
UPDATE @T
SET Grp = @grp
WHERE id = @i
SET @i = @i 1
END
SELECT * FROM @T
CodePudding user response:
If you only need Grp
to group by it, it can be simplified this way:
declare @t table (id int identity, Distance decimal(18,10)
, a int, b int, Grp int)
insert @t (Distance, a, b)
-- select Distance, a, b From MyTable order by Distance
values
(0.0,100,114),(0.1, 64,125),(0.1, 88,100),(0.1, 65,125),
(0.1, 63, 64),(0.1, 65, 66),(0.2, 63, 66),(0.2, 10, 61),
(0.2, 19, 61),(0.2, 30, 61),(0.2, 10, 65),(0.2, 10, 94),
(0.2, 19, 65),(0.2, 19, 94),(0.2, 30, 94),(0.2, 60, 94),
(0.2, 61, 94)
declare @i int, @d float, @a int, @b int, @g int
declare c1 cursor for select * from @t for update of Grp
open c1
fetch next from c1 into @i, @d, @a, @b, @g
update @t set Grp = 1 where current of c1
fetch next from c1 into @i, @d, @a, @b, @g
while @@fetch_status = 0
begin
update @t set Grp =
isNull((select top 1 (Grp)
from @t t2
where t2.id < @i
and (@a in (t2.a , t2.b)
or @b in (t2.a , t2.b)))
, @i)
where current of c1
fetch next from c1 into @i, @d, @a, @b, @g
end
close c1
deallocate c1
-- If you need consecutive Grp numbers ..
declare @u table (id int identity, Grp int)
insert @u (Grp)
select distinct Grp from @t order by Grp
update @t set Grp = u.id
from @t t
join @u u on (u.Grp = t.Grp and u.Grp<>u.id)
select * from @t
CodePudding user response:
I used a help table to put the desired result in the main table.
CREATE TABLE tb(id int, c decimal(18,10), a int, b int, Grp int);
//----------------------
//----------------------
DECLARE @T AS TABLE (id int, Distance decimal(18,10), a int, b int, Grp int,Grp1 int);
INSERT into @T values
(1, 0.0000000000, 100, 114, NULL,NULL),
(2, 0.1000000000, 64, 125, NULL,NULL),
(3, 0.1000000000, 88, 100, NULL,NULL),
(4, 0.1000000000, 65, 125, NULL,NULL),
(5, 0.1000000000, 63, 64, NULL,NULL),
(6, 0.1000000000, 65, 66, NULL,NULL),
(7, 0.2000000000, 63, 66, NULL,NULL),
(8, 0.2000000000, 10, 61, NULL,NULL),
(9, 0.2000000000, 19, 61, NULL,NULL),
(10, 0.2000000000, 30, 61, NULL,NULL),
(11, 0.2000000000, 10, 65, NULL,NULL),
(12, 0.2000000000, 10, 94, NULL,NULL),
(13, 0.2000000000, 19, 65, NULL,NULL),
(14, 0.2000000000, 19, 94, NULL,NULL),
(15, 0.2000000000, 30, 94, NULL,NULL),
(16, 0.2000000000, 60, 94, NULL,NULL),
(17, 0.2000000000, 61, 94, NULL,NULL);
UPDATE @T
SET grp1 = t2.grp2
FROM @T t1 CROSS JOIN
(SELECT *,
CASE
WHEN id = 1 THEN 1
ELSE
ISNULL((SELECT top 1 id FROM @T s2 WHERE s2.id < s1.id AND (s2.a = s1.a OR s2.b = s1.b OR s2.b = s1.a OR s2.a = s1.b)),0)
END AS grp2
FROM @T s1) t2
WHERE t2.id = t1.id;
Declare @id int
Declare @Distance int
Declare @a int
Declare @b int
Declare @Grp int
Declare @Grp1 int
WHILE((SELECT COUNT(*) FROM @T)>0)
BEGIN
SET @id = (SELECT Top 1 id FROM @T)
SET @Distance = (SELECT Top 1 Distance FROM @T)
SET @a = (SELECT Top 1 a FROM @T)
SET @b = (SELECT Top 1 b FROM @T)
SET @Grp = (SELECT Top 1 Grp FROM @T)
SET @Grp1 = (SELECT Top 1 Grp1 FROM @T)
INSERT into tb
SELECT @id,@Distance,@a,@b,
CASE
WHEN @id = 1 THEN 1
WHEN @Grp1 > 0 THEN (SELECT top 1 s2.grp FROM tb s2 WHERE s2.id = @grp1)
ELSE (SELECT max(s2.grp) 1 FROM tb s2 WHERE s2.id < @id)
END
Delete @T WHERE id=@id
END
//------------------------
SELECT * FROM tb;
Demo in db<>fiddle