Home > Mobile >  BigQuery SQL query to Indicate a sequence of 3 rows sharing the same value
BigQuery SQL query to Indicate a sequence of 3 rows sharing the same value

Time:11-23

I need a query that every time the indicator column turns into zero and there are 3 zeros in a row, I would like to assign them a unique group number.

Here is a sample data:

select  0 as offset, 1 as indicator, -1 as grp union all
select  1,  1,  -1  union all
select  2,  1,  -1   union all
select  3,  1,  -1  union all   
select  4,  1,  -1   union all  
select  5,  1,  -1   union all  
select  6,  1,  -1   union all  
select  7,  0,   1   union all  
select  8,  0,   1   union all  
select  9,  0,   1   union all  
select 10,  1,  -1   union all  
select 11,  0,   2   union all  
select 12,  0,   2   union all  
select 13,  0,   2   union all  
select 14,  1,  -1   union all  
select 15,  1,  -1   union all  
select 16,  1,  -1  

In this example there are two sequences of 3 zeros, indicated as grp=1 and grp=2.

CodePudding user response:

The below query solves this.

Firstly it assigns all of the desired groups a tag.

Secondly, we get the row number for them and use integer casting on row_number to assign them a unique group number.

with data as (select  0 as offset, 1 as indicator, -1 as grp union all
select  1,  1,  -1  union all
select  2,  1,  -1   union all
select  3,  1,  -1  union all   
select  4,  1,  -1   union all  
select  5,  1,  -1   union all  
select  6,  1,  -1   union all  
select  7,  0,   1   union all  
select  8,  0,   1   union all  
select  9,  0,   1   union all  
select 10,  1,  -1   union all  
select 11,  0,   2   union all  
select 12,  0,   2   union all  
select 13,  0,   2   union all  
select 14,  1,  -1   union all  
select 15,  1,  -1   union all  
select 16,  1,  -1 ),

tagged as (select 
*,
-- mark as part of the group if both indicators in front, both indicators behind, or one indicator in front and behind are 0.
case 
when indicator = 0 and lead(indicator) over(order by offset) = 0 and lead(indicator, 2) over(order by offset) = 0 then true
when indicator = 0 and lead(indicator) over(order by offset) = 0 and lag(indicator) over(order by offset) = 0 then true
when indicator = 0 and lag(indicator) over(order by offset) = 0 and lag(indicator, 2) over(order by offset) = 0 then true
else false
end as part_of_group
from data),

group_tags as (
select
*,
-- use cast as int to acquire the group number from the row number
CAST((row_number() over(order by offset)   1)/3 AS INT) as group_tag
from 
tagged
where
part_of_group = true)

-- rejoin this data back together
select 
d.*,
gt.group_tag
from data as d
left join
group_tags as gt
on
d.offset = gt.offset

CodePudding user response:

You may consider below approach as well,

WITH partitions AS (
  SELECT *, indicator = 0 AND COUNT(div) OVER (PARTITION BY div, indicator) = 3 AS flag 
    FROM (
      -- *IF* is for making a group number start from 1 at DENSE_RANK() in main query below
      SELECT *, IF(indicator = 1, NULL, SUM(indicator) OVER (ORDER BY offset)) AS div,
        FROM sample_data
  )
)
SELECT offset, indicator, IF(flag, DENSE_RANK() OVER (ORDER BY div) - 1, -1) AS grp
  FROM partitions
 ORDER BY offset;

Query results

enter image description here

  • Related