Home > database >  How to group timestamps in blocks and get counts for rolling previous hour?
How to group timestamps in blocks and get counts for rolling previous hour?

Time:12-01

I have a table with timestamps and event IDs. I need to count the number of event IDs over the previous hour for 15 min timestamps (00:15, 00:30, 00:45 and so on).

sample table:

CREATE GLOBAL TEMPORARY TABLE my_temp( 
  EVENT_START_TIMESTAMP TIMESTAMP, 
  EVENTS_ID VARCHAR2(30)) ON COMMIT PRESERVE ROWS;
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:02:49.623', 'YYYY-MM-DD HH24:MI:SS.FF3'), NULL);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:02:59.250', 'YYYY-MM-DD HH24:MI:SS.FF3'), NULL);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:03:40.000', 'YYYY-MM-DD HH24:MI:SS.FF3'),  69208749);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:03:45.000', 'YYYY-MM-DD HH24:MI:SS.FF3'),  69208750);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 01:09:15.320', 'YYYY-MM-DD HH24:MI:SS.FF3'), NULL);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:09:26.000', 'YYYY-MM-DD HH24:MI:SS.FF3'),  69208765);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:10:25.000', 'YYYY-MM-DD HH24:MI:SS.FF3'), NULL);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:34:23.240', 'YYYY-MM-DD HH24:MI:SS.FF3'), NULL);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:35:01.000', 'YYYY-MM-DD HH24:MI:SS.FF3'),  69208767);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 01:35:05.000', 'YYYY-MM-DD HH24:MI:SS.FF3'),  69208768);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 01:35:07.850', 'YYYY-MM-DD HH24:MI:SS.FF3'), NULL);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:35:10.000', 'YYYY-MM-DD HH24:MI:SS.FF3'),  69208769);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:36:07.000', 'YYYY-MM-DD HH24:MI:SS.FF3'),  69208772);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:36:11.000', 'YYYY-MM-DD HH24:MI:SS.FF3'),  69208773);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:36:16.000', 'YYYY-MM-DD HH24:MI:SS.FF3'),  69208774);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:36:21.360', 'YYYY-MM-DD HH24:MI:SS.FF3'), NULL);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:45:03.830', 'YYYY-MM-DD HH24:MI:SS.FF3'), NULL);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:45:04.000', 'YYYY-MM-DD HH24:MI:SS.FF3'),  69208779);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 02:45:08.533', 'YYYY-MM-DD HH24:MI:SS.FF3'), NULL);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:45:09.000', 'YYYY-MM-DD HH24:MI:SS.FF3'),  69208780);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:47:29.000', 'YYYY-MM-DD HH24:MI:SS.FF3'),  69208789);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 00:48:43.833', 'YYYY-MM-DD HH24:MI:SS.FF3'), NULL);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 01:02:20.333', 'YYYY-MM-DD HH24:MI:SS.FF3'), NULL);
INSERT INTO my_temp VALUES(TO_TIMESTAMP('2022-09-26 01:02:21.973', 'YYYY-MM-DD HH24:MI:SS.FF3'), NULL);
SELECT * FROM my_temp;

Following is the code I am using:

SELECT  Trunc(das.EVENT_START_TIMESTAMP, 'HH')   NUMTODSINTERVAL(floor((EXTRACT(SECOND  FROM (das.EVENT_START_TIMESTAMP - Trunc(das.EVENT_START_TIMESTAMP, 'HH')))
                  EXTRACT(MINUTE FROM (das.EVENT_START_TIMESTAMP - Trunc(das.EVENT_START_TIMESTAMP, 'HH')))*60)/900)*900 900, 'second') AS timestamp_blk_end
        , count(das.EVENTS_ID) OVER (PARTITION BY Trunc(das.EVENT_START_TIMESTAMP) ORDER BY das.EVENT_START_TIMESTAMP RANGE BETWEEN INTERVAL '3600' SECOND PRECEDING AND INTERVAL '0' SECOND FOLLOWING) AS processed_count
FROM my_temp das
GROUP by  Trunc(das.EVENT_START_TIMESTAMP, 'HH')   NUMTODSINTERVAL(floor((EXTRACT(SECOND  FROM (das.EVENT_START_TIMESTAMP - Trunc(das.EVENT_START_TIMESTAMP, 'HH')))
                  EXTRACT(MINUTE FROM (das.EVENT_START_TIMESTAMP - Trunc(das.EVENT_START_TIMESTAMP, 'HH')))*60)/900)*900 900, 'second')

But this throws an error saying ORA-00979: not a GROUP BY expression. if I include das.EVENT_START_TIMESTAMP, das.EVENTS_ID as groupby, i get the right values but with repeated rows which I dont need or want.

How do I create a query that gives me the following output

    TIMESTAMP_BLK_END       PROCESSED_COUNT
2022-09-26 00:15:00.000         3
2022-09-26 00:30:00.000         3
2022-09-26 00:45:00.000         8
2022-09-26 01:00:00.000         11
2022-09-26 01:15:00.000         8
2022-09-26 01:30:00.000         8
2022-09-26 01:45:00.000         4
2022-09-26 02:00:00.000         1
2022-09-26 02:15:00.000         1
2022-09-26 02:30:00.000         1
2022-09-26 02:45:00.000         0
2022-09-26 03:00:00.000         0

CodePudding user response:

You can use:

WITH quarter_hour_groups (timestamp_blk_end, events_id) AS (
  SELECT Trunc(EVENT_START_TIMESTAMP, 'HH')
           NUMTODSINTERVAL(
             15 * FLOOR(EXTRACT(MINUTE FROM EVENT_START_TIMESTAMP) / 15)   15,
             'MINUTE'
           ) AS timestamp_blk_end,
         events_id
  FROM   my_temp
),
all_times (time) AS (
  SELECT min_time   (LEVEL - 1) * INTERVAL '15' MINUTE
  FROM   (
    SELECT MIN(timestamp_blk_end) AS min_time,
           MAX(timestamp_blk_end) AS max_time
    FROM   quarter_hour_groups
  )
  CONNECT BY
         min_time   (LEVEL - 1) * INTERVAL '15' MINUTE <= max_time
)
SELECT  a.time,
        SUM(COUNT(EVENTS_ID)) OVER (
          ORDER BY a.time
          RANGE BETWEEN 0.75/24 PRECEDING AND 0 FOLLOWING
        ) AS num_event
FROM    all_times a
        LEFT OUTER JOIN quarter_hour_groups g
        ON (a.time = g.timestamp_blk_end)
GROUP BY
        a.time

Which, for the sample data, outputs:

TIME NUM_EVENT
2022-09-26 00:15:00 3
2022-09-26 00:30:00 3
2022-09-26 00:45:00 8
2022-09-26 01:00:00 11
2022-09-26 01:15:00 8
2022-09-26 01:30:00 8
2022-09-26 01:45:00 4
2022-09-26 02:00:00 1
2022-09-26 02:15:00 1
2022-09-26 02:30:00 1
2022-09-26 02:45:00 0
2022-09-26 03:00:00 0

fiddle

  • Related