Home > database >  Count size of islands with no gaps
Count size of islands with no gaps

Time:05-20

My data has 3 columns; ID, NAME, VALUE ID is sequential number like a rowcount There are many different NAMEs I show 3 in the data but there could be 10 There are 2 VALUEs

I would like to group the NAMEs and VALUEs in a 'local' manner and return the size of these 'local' groups. This seems like a version of the Islands and Gaps scenario but no Gaps and Islands of repeating values. I've tried variations on solutions to Islands and Gaps but so far, no real progress.

One of my failed attempts at a partial solution ( a variation on work by Itzik Ben-Gan):

WITH A
AS (SELECT ROW_NUMBER() OVER (ORDER BY id) RN,
           NAME,
           VALUE
    FROM #data
    WHERE NAME LIKE '%Joe%'),
T
AS (
SELECT ROW_NUMBER() OVER (ORDER BY RN) - RN AS Grp,
           RN
    FROM A
    WHERE VALUE = 0)
SELECT MIN(T.RN) AS [From],
       MAX(T.RN) AS [To],
       MAX(T.RN) - MIN(T.RN) AS [Length]
FROM T
GROUP BY Grp
ORDER BY MIN(T.RN);

Output like:

NAME VALUE Count
Joe 0 15
Joe 1 3
Joe 0 5
Joe 1 19
Sue 1 23
Sue 0 17
Sue 1 4
Mary 0 16
Mary 1 10
Mary 0 7
Mary 1 19
Mary 0 3

Where the data is like:

ID NAME VALUE
1 Joe 0
2 Joe 0
3 Joe 0
4 Joe 0
5 Joe 0
6 Joe 0
7 Joe 0
8 Joe 0
9 Joe 0
10 Joe 0
11 Sue 1
12 Sue 1
13 Sue 1
14 Sue 1
15 Sue 1
16 Sue 1
17 Sue 1
18 Sue 1
19 Sue 1
20 Sue 1
21 Sue 1
22 Sue 1
23 Sue 1
24 Sue 1
25 Sue 1
26 Sue 1
27 Mary 0
28 Mary 0
29 Mary 0
30 Mary 0
31 Mary 0
32 Mary 0
33 Mary 0
34 Mary 0
35 Joe 0
36 Joe 0
37 Joe 0
38 Joe 0
39 Joe 0
40 Joe 1
41 Joe 1
42 Joe 1
43 Joe 0
44 Joe 0
45 Joe 0
46 Joe 0
47 Joe 0
48 Joe 1
49 Joe 1
50 Mary 0
51 Mary 0
52 Mary 0
53 Mary 0
54 Mary 0
55 Mary 0
56 Mary 0
57 Mary 0
58 Mary 1
59 Mary 1
60 Mary 1
61 Mary 1
62 Mary 1
63 Mary 1
64 Mary 1
65 Mary 1
66 Mary 1
67 Mary 1
68 Mary 0
69 Sue 1
70 Sue 1
71 Sue 1
72 Sue 1
73 Sue 1
74 Sue 1
75 Sue 1
76 Sue 0
77 Sue 0
78 Sue 0
79 Sue 0
80 Sue 0
81 Sue 0
82 Sue 0
83 Sue 0
84 Mary 0
85 Mary 0
86 Mary 0
87 Mary 0
88 Mary 0
89 Mary 0
90 Mary 1
91 Mary 1
92 Mary 1
93 Mary 1
94 Mary 1
95 Mary 1
96 Mary 1
97 Mary 1
98 Mary 1
99 Mary 1
100 Mary 1
101 Sue 0
102 Sue 0
103 Sue 0
104 Sue 0
105 Sue 0
106 Sue 0
107 Sue 0
108 Sue 0
109 Sue 0
110 Sue 1
111 Sue 1
112 Sue 1
113 Sue 1
114 Joe 1
115 Joe 1
116 Joe 1
117 Joe 1
118 Joe 1
119 Joe 1
120 Joe 1
121 Joe 1
122 Joe 1
123 Joe 1
124 Mary 1
125 Mary 1
126 Mary 1
127 Mary 1
128 Mary 1
129 Mary 1
130 Mary 1
131 Mary 1
132 Mary 0
133 Mary 0
134 Mary 0
135 Joe 1
136 Joe 1
137 Joe 1
138 Joe 1
139 Joe 1
140 Joe 1
141 Joe 1

CodePudding user response:

DROP TABLE IF EXISTS #data;
CREATE TABLE #data 
(
    ID INT,
    NAME VARCHAR(50),
    VALUE INT
);
INSERT INTO #data values
(1  ,'Joe', 0    ),
(2  ,'Joe', 0    ),
(3  ,'Joe', 0    ),
(4  ,'Joe', 0    ),
(5  ,'Joe', 0    ),
(6  ,'Joe', 0    ),
(7  ,'Joe', 0    ),
(8  ,'Joe', 0    ),
(9  ,'Joe', 0    ),
(10 ,'Joe', 0    ),
(11 ,'Sue', 1    ),
(12 ,'Sue', 1    ),
(13 ,'Sue', 1    ),
(14 ,'Sue', 1    ),
(15 ,'Sue', 1    ),
(16 ,'Sue', 1    ),
(17 ,'Sue', 1    ),
(18 ,'Sue', 1    ),
(19 ,'Sue', 1    ),
(20 ,'Sue', 1    ),
(21 ,'Sue', 1    ),
(22 ,'Sue', 1    ),
(23 ,'Sue', 1    ),
(24 ,'Sue', 1    ),
(25 ,'Sue', 1    ),
(26 ,'Sue', 1    ),
(27 ,'Mary',    0    ),
(28 ,'Mary',    0    ),
(29 ,'Mary',    0    ),
(30 ,'Mary',    0    ),
(31 ,'Mary',    0    ),
(32 ,'Mary',    0    ),
(33 ,'Mary',    0    ),
(34 ,'Mary',    0    ),
(35 ,'Joe', 0    ),
(36 ,'Joe', 0    ),
(37 ,'Joe', 0    ),
(38 ,'Joe', 0    ),
(39 ,'Joe', 0    ),
(40 ,'Joe', 1    ),
(41 ,'Joe', 1    ),
(42 ,'Joe', 1    ),
(43 ,'Joe', 0    ),
(44 ,'Joe', 0    ),
(45 ,'Joe', 0    ),
(46 ,'Joe', 0    ),
(47 ,'Joe', 0    ),
(48 ,'Joe', 1    ),
(49 ,'Joe', 1    ),
(50 ,'Mary',    0    ),
(51 ,'Mary',    0    ),
(52 ,'Mary',    0    ),
(53 ,'Mary',    0    ),
(54 ,'Mary',    0    ),
(55 ,'Mary',    0    ),
(56 ,'Mary',    0    ),
(57 ,'Mary',    0    ),
(58 ,'Mary',    1    ),
(59 ,'Mary',    1    ),
(60 ,'Mary',    1    ),
(61 ,'Mary',    1    ),
(62 ,'Mary',    1    ),
(63 ,'Mary',    1    ),
(64 ,'Mary',    1    ),
(65 ,'Mary',    1    ),
(66 ,'Mary',    1    ),
(67 ,'Mary',    1    ),
(68 ,'Mary',    0    ),
(69 ,'Sue', 1    ),
(70 ,'Sue', 1    ),
(71 ,'Sue', 1    ),
(72 ,'Sue', 1    ),
(73 ,'Sue', 1    ),
(74 ,'Sue', 1    ),
(75 ,'Sue', 1    ),
(76 ,'Sue', 0    ),
(77 ,'Sue', 0    ),
(78 ,'Sue', 0    ),
(79 ,'Sue', 0    ),
(80 ,'Sue', 0    ),
(81 ,'Sue', 0    ),
(82 ,'Sue', 0    ),
(83 ,'Sue', 0    ),
(84 ,'Mary',    0    ),
(85 ,'Mary',    0    ),
(86 ,'Mary',    0    ),
(87 ,'Mary',    0    ),
(88 ,'Mary',    0    ),
(89 ,'Mary',    0    ),
(90 ,'Mary',    1    ),
(91 ,'Mary',    1    ),
(92 ,'Mary',    1    ),
(93 ,'Mary',    1    ),
(94 ,'Mary',    1    ),
(95 ,'Mary',    1    ),
(96 ,'Mary',    1    ),
(97 ,'Mary',    1    ),
(98 ,'Mary',    1    ),
(99 ,'Mary',    1    ),
(100    ,'Mary',    1),
(101    ,'Sue', 0),
(102    ,'Sue', 0),
(103    ,'Sue', 0),
(104    ,'Sue', 0),
(105    ,'Sue', 0),
(106    ,'Sue', 0),
(107    ,'Sue', 0),
(108    ,'Sue', 0),
(109    ,'Sue', 0),
(110    ,'Sue', 1),
(111    ,'Sue', 1),
(112    ,'Sue', 1),
(113    ,'Sue', 1),
(114    ,'Joe', 1),
(115    ,'Joe', 1),
(116    ,'Joe', 1),
(117    ,'Joe', 1),
(118    ,'Joe', 1),
(119    ,'Joe', 1),
(120    ,'Joe', 1),
(121    ,'Joe', 1),
(122    ,'Joe', 1),
(123    ,'Joe', 1),
(124    ,'Mary',    1),
(125    ,'Mary',    1),
(126    ,'Mary',    1),
(127    ,'Mary',    1),
(128    ,'Mary',    1),
(129    ,'Mary',    1),
(130    ,'Mary',    1),
(131    ,'Mary',    1),
(132    ,'Mary',    0),
(133    ,'Mary',    0),
(134    ,'Mary',    0),
(135    ,'Joe', 1),
(136    ,'Joe', 1),
(137    ,'Joe', 1),
(138    ,'Joe', 1),
(139    ,'Joe', 1),
(140    ,'Joe', 1),
(141    ,'Joe', 1);

-- first find the changes
WITH breaks AS 
(
    SELECT ID, NAME, VALUE, 
        IIF(LAG(VALUE, 1) OVER (PARTITION BY Name ORDER BY ID) != VALUE, 1, 0) AS NewGrp 
    FROM #data
), 
-- then count how many breaks we have passed 
grouped AS 
(
    SELECT ID, NAME, VALUE, SUM(NewGrp) OVER (PARTITION BY Name ORDER BY ID) grp
    FROM breaks
)
-- group by that count
SELECT NAME, VALUE, COUNT(*)
FROM grouped
GROUP BY NAME, VALUE, grp
ORDER BY NAME, MIN(ID)
  • Related