I have a table with a series of timelines that are normalized starting from 00:00:00.00000
. I want to summate them sequentially and stitch them together based on my order_key
value.
Sample Data:
id order_key start_time end_time activity_type
1 1 00:00:00.00000 00:00:00.01000 A
1 1 00:00:00.01000 00:00:00.02000 B
1 1 00:00:00.02000 00:00:00.03000 C
1 1 00:00:00.03000 00:00:00.03500 A
1 2 00:00:00.00000 00:00:00.01500 A
1 2 00:00:00.01500 00:00:00.04500 B
1 3 00:00:00.00000 00:00:00.05500 B
Desired Output:
id start_time end_time activity_type
1 00:00:00.00000 00:00:00.01000 A
1 00:00:00.01000 00:00:00.02000 B
1 00:00:00.02000 00:00:00.03000 C
1 00:00:00.03000 00:00:00.03500 A
1 00:00:00.03500 00:00:00.05000 A
1 00:00:00.05000 00:00:00.08000 B
1 00:00:00.08000 00:00:00.13500 B
My Attempt:
SELECT
id
, -- CASE WHEN new order_key THEN LAG(end_time) OVER (PARTITION BY id ORDER BY snap_view_index, start_time) ELSE start_time END AS start_time
, -- CASE WHEN new order_key THEN TIME_ADD(LAG(end_time), INTERVAL TIME_DIFF(end_time, start_time, MILLISECOND) MILLISECOND) ELSE TIME_ADD(start_time, INTERVAL TIME_DIFF(end_time, start_time, MILLISECOND) MILLISECOND)
, activity_type
FROM my_table;
CodePudding user response:
Consider below query:
Recursive Approach
WITH RECURSIVE t AS (
SELECT ROW_NUMBER() OVER (PARTITION BY id ORDER BY order_key, start_time) pos, *
FROM sample_table
),
r AS (
SELECT * FROM t WHERE pos = 1
UNION ALL
SELECT t.* REPLACE (
r.end_time AS start_time,
TIME_ADD(r.end_time, INTERVAL TIME_DIFF(t.end_time, t.start_time, MILLISECOND) MILLISECOND) AS end_time
)
FROM r JOIN t ON t.pos = r.pos 1
)
SELECT * EXCEPT(pos, order_key) FROM r ORDER BY pos;
Non-recursive Approach
WITH diff_by_key AS (
SELECT id, order_key,
SUM(TIME_DIFF(MAX(end_time), MIN(start_time), MILLISECOND)) OVER w AS cum_diff
FROM sample_table GROUP BY id, order_key
WINDOW w AS (PARTITION BY id ORDER BY order_key)
)
SELECT s.*
REPLACE(
TIME_ADD(start_time, INTERVAL IFNULL(cum_diff, 0) MILLISECOND) AS start_time,
TIME_ADD(end_time, INTERVAL IFNULL(cum_diff, 0) MILLISECOND) AS end_time
)
FROM sample_table s LEFT JOIN diff_by_key d ON s.id = d.id AND s.order_key = d.order_key 1
ORDER BY order_key, start_time;