Home > Mobile >  How to select data that is conditional and close to each other over a period of time
How to select data that is conditional and close to each other over a period of time

Time:06-01

I have the following data and want to get the lines of the same unit, but the time difference is within 4 hours. I was thinking of splitting 1 day into 4 hour intervals and creating multiple columns, but is there a more optimal way

31/05 00:44 | Unit A    
30/05 09:34 | Unit A    
30/05 14:42 | Unit B    "Time ≤ 4hr → right conditions"
30/05 14:50 | Unit B    
30/05 14:43 | Unit B    
30/05 14:49 | Unit B    
30/05 14:53 | Unit B    
30/05 14:41 | Unit B    
30/05 14:38 | Unit B    
30/05 14:38 | Unit B    
30/05 14:39 | Unit B    
30/05 14:41 | Unit B    
30/05 14:40 | Unit B    
30/05 14:52 | Unit B    
30/05 14:50 | Unit B    
31/05 02:17 | Unit C    
31/05 02:37 | Unit C    
31/05 04:15 | Unit D    "Time ≤ 4hr → right conditions"
31/05 04:32 | Unit D    
31/05 04:12 | Unit D    
31/05 02:11 | Unit D    
31/05 02:26 | Unit D    
31/05 04:11 | Unit D    
31/05 02:16 | Unit D    
31/05 01:48 | Unit D    
31/05 01:54 | Unit D    
31/05 01:50 | Unit D    
31/05 05:11 | Unit E    "Time > 4hr → not right conditions"
31/05 01:47 | Unit E    
31/05 01:44 | Unit E    
31/05 01:44 | Unit E    
**30/05 16:06** | Unit E    
31/05 01:58 | Unit E    
31/05 01:47 | Unit E    
31/05 09:18 | Unit E    
31/05 09:14 | Unit E    
31/05 01:41 | Unit E    
31/05 02:31 | Unit F    
31/05 01:41 | Unit F    
31/05 04:10 | Unit F    
31/05 04:20 | Unit F    
31/05 02:42 | Unit F

Output I want: enter here]

CodePudding user response:

All time dimensions converted to minutes, time limit set to 240 min (4h), count limit set to 10. The result contains all the units for better understanding of the code. Units that have occurencies through more then one day handled too.

WITH
    tbl AS
        (
          Select
            a.UNITS "UNIT",
            a.DATES "DT_TM",
            Count(UNITS) OVER(PARTITION BY UNITS ORDER BY UNITS) "COUNT_OF_UNITS",
            SubStr(a.DATES, 4, 2) || SubStr(a.DATES, 1, 2) "MONTH_DAY",
            (To_Number(SubStr(a.DATES, 7, 2)) * 60)   To_Number(SubStr(a.DATES, 10, 2)) "MINUTE_OF_DAY",
            CASE 
                WHEN SubStr(a.DATES, 4, 2) || SubStr(a.DATES, 1, 2) = Min(SubStr(a.DATES, 4, 2) || SubStr(a.DATES, 1, 2)) OVER (PARTITION BY a.UNITS ORDER BY a.UNITS, SubStr(a.DATES, 4, 2) || SubStr(a.DATES, 1, 2))
                THEN (24 * 60) - ((To_Number(SubStr(a.DATES, 7, 2)) * 60)   To_Number(SubStr(a.DATES, 10, 2)))
            ELSE 0
            END "MINUTES_TO_MIDNIGHT"
          From
            A_TABLE a
        ),
    days AS
        (
            SELECT 
                UNITS "UNIT",
                Max(DATES) "MAX_DATES",
                Max(SubStr(DATES, 4, 2) || SubStr(DATES, 1, 2)) "MONTH_DAY",
                To_Number(Max(SubStr(DATES, 4, 2) || SubStr(DATES, 1, 2))) - To_Number(Min(SubStr(DATES, 4, 2) || SubStr(DATES, 1, 2))) "DAY_DIFF"
            FROM
                A_TABLE
            GROUP BY
                UNITS
            HAVING 
                To_Number(Max(SubStr(DATES, 4, 2) || SubStr(DATES, 1, 2))) - To_Number(Min(SubStr(DATES, 4, 2) || SubStr(DATES, 1, 2))) > 0 
        ),
    mins AS
       (
            Select 
                t.UNIT,
                4 * 60 "MINUTES_LIMIT",
                10 "COUNT_LIMIT",
                COUNT_OF_UNITS "COUNT_OF_UNITS",
                CASE 
                    WHEN Nvl(d.DAY_DIFF, 0) = 0 
                    THEN SubStr(t.DT_TM, 1, 5) 
                ELSE LPAD(To_Char(To_Number(SubStr(t.DT_TM, 1, 2))   d.DAY_DIFF), 2, '0') || SubStr(t.DT_TM, 3, 3)   
                END "DT", 
                SubStr(t.DT_TM, 7, 5) "TM",
                SubStr(t.DT_TM, 4, 2) || SubStr(t.DT_TM, 1, 2) || SubStr(t.DT_TM, 7, 2) || SubStr(t.DT_TM, 10, 2) "DT_TM",
                Nvl(d.DAY_DIFF, 0) "DAY_DIFF",
                CASE 
                    WHEN Nvl(d.DAY_DIFF, 0) = 0 
                    THEN t.MINUTE_OF_DAY 
                ELSE (SELECT (To_Number(SubStr(MAX_DATES, 7, 2)) * 60)   To_Number(SubStr(MAX_DATES, 10, 2)) FROM days WHERE UNIT = t.UNIT) 
                END "MINUTE_OF_DAY",
                CASE 
                    WHEN Nvl(d.DAY_DIFF, 0) = 0 
                    THEN 0 
                ELSE t.MINUTES_TO_MIDNIGHT 
                END "MINUTES_TO_MIDNIGHT"
            From
                tbl t
            LEFT JOIN
                days d ON(d.UNIT = t.UNIT)
            WHERE
                (Nvl(d.DAY_DIFF, 0) = 0 And CASE WHEN Nvl(d.DAY_DIFF, 0) = 0 THEN 0 ELSE t.MINUTES_TO_MIDNIGHT END = 0)
              OR 
                CASE WHEN Nvl(d.DAY_DIFF, 0) = 0 THEN 0 ELSE t.MINUTES_TO_MIDNIGHT END > 0
            ORDER BY 
                t.UNIT,
                SubStr(t.DT_TM, 4, 2) || SubStr(t.DT_TM, 1, 2) || SubStr(t.DT_TM, 7, 2) || SubStr(t.DT_TM, 10, 2)
        )
SELECT
    UNIT,
    COUNT_OF_UNITS,
    CASE 
        WHEN DAY_DIFF = 0 
        THEN MAX_MINUTE_OF_DAY - MIN_MINUTE_OF_DAY 
    ELSE (MAX_MINUTE_OF_DAY   MAX_MINUTES_TO_MIDNIGHT)   ((DAY_DIFF - 1) * 24 * 60) 
    END "MINUTES_DIFF",
    MINUTES_LIMIT "TIME_LIMIT",
    CASE 
        WHEN MINUTES_LIMIT < CASE 
                                WHEN DAY_DIFF = 0 
                                THEN MAX_MINUTE_OF_DAY - MIN_MINUTE_OF_DAY 
                             ELSE (MAX_MINUTE_OF_DAY   MAX_MINUTES_TO_MIDNIGHT)   ((DAY_DIFF - 1) * 24 * 60) 
                             END 
        THEN 'OVER_THE_LIMIT'
    ELSE 'OK'
    END "TIME_STATUS",
    COUNT_LIMIT "COUNT_LIMIT",
    CASE
        WHEN COUNT_OF_UNITS < 10 
        THEN 'BELOW_THE_LIMIT' 
    ELSE 'OK' 
    END "COUNT_STATUS"
FROM
    (
        SELECT
            m.UNIT "UNIT",
            m.DT "DT",
            m.DAY_DIFF "DAY_DIFF",
            m.MINUTES_LIMIT "MINUTES_LIMIT",
            m.COUNT_LIMIT "COUNT_LIMIT",
            COUNT_OF_UNITS "COUNT_OF_UNITS",
            Min(m.MINUTE_OF_DAY) "MIN_MINUTE_OF_DAY",
            Max(m.MINUTE_OF_DAY) "MAX_MINUTE_OF_DAY",
            Max(m.MINUTES_TO_MIDNIGHT) "MAX_MINUTES_TO_MIDNIGHT"
        FROM
            mins m
        GROUP BY
            m.UNIT,
            m.DT,
            m.DAY_DIFF,
            m.MINUTES_LIMIT,
            m.COUNT_LIMIT,
            COUNT_OF_UNITS
        ORDER BY 
            m.UNIT
    )
--
--  R e s u l t :
--
--  UNIT        COUNT_OF_UNITS MINUTES_DIFF TIME_LIMIT TIME_STATUS    COUNT_LIMIT COUNT_STATUS  
--  ------      -------------- ------------ ---------- -------------- ----------- ---------------
--  Unit A                   2          910        240 OVER_THE_LIMIT          10 BELOW_THE_LIMIT 
--  Unit B  **-->           13           15        240 OK                      10 OK                <--**
--  Unit C                   2           20        240 OK                      10 BELOW_THE_LIMIT 
--  Unit D  **-->           10          164        240 OK                      10 OK                <--**
--  Unit E                  10         1032        240 OVER_THE_LIMIT          10 BELOW_THE_LIMIT 
--  Unit F                   5          159        240 OK                      10 BELOW_THE_LIMIT

CodePudding user response:

I would suggest using a "gaps and islands" approach to the data. This uses 2 row_number() calculations to calculate a "group number" (grp) and once that is determined then the duration of each group can be calculated.

select
      grp
    , max(unit) unit
    ,  to_char(min(logtime),'dd/mm/yyyy hh24:ss') mindt
    ,  to_char(max(logtime),'dd/mm/yyyy hh24:ss') maxdt
    , case when max(logtime) - min(logtime) <= 4/24 then 'Good' else 'Bad' end is_ok
    , count(*) counted
    , max(note) note
from (
    select
          row_number() over(order by (select 1 from dual))
          - row_number() over(partition by unit order by (select 1 from dual)) as grp
        , Logtime
        , Unit
        , note
    from unitlog
    ) d
group by
   grp
-- having max(logtime) - min(logtime) <= 4/24
-- and count(*) >= 10
order by
   mindt


GRP UNIT    MINDT               MAXDT               IS_OK   COUNTED NOTE
0   Unit A  30/05/2022 09:34    30/05/2022 09:34    Good    1   
2   Unit B  30/05/2022 14:38    30/05/2022 14:53    Good    13  "Time ≤ 4hr → right conditions"
27  Unit E  30/05/2022 16:06    31/05/2022 09:18    Bad     10  "Time > 4hr → not right conditions"
40  Unit A  31/05/2022 00:44    31/05/2022 00:44    Good    1   
37  Unit F  31/05/2022 01:41    31/05/2022 04:20    Good    4   
17  Unit D  31/05/2022 01:48    31/05/2022 04:32    Good    10  "Time ≤ 4hr → right conditions"
15  Unit C  31/05/2022 02:17    31/05/2022 02:37    Good    2   
-3  Unit F  31/05/2022 02:42    31/05/2022 02:42    Good    1   

Note that if the having clause seen above was applied the result would be:

GRP UNIT    MINDT               MAXDT               IS_OK   COUNTED NOTE
2   Unit B  30/05/2022 14:38    30/05/2022 14:53    Good    13  "Time ≤ 4hr → right conditions"
17  Unit D  31/05/2022 01:48    31/05/2022 04:32    Good    10  "Time ≤ 4hr → right conditions"

see: https://dbfiddle.uk/?rdbms=oracle_11.2&fiddle=139bf2bc925f8f6830ccc3b46d4c3113

Nb: I have assumed that the date/time information is stored as an Oracle date data type and when subtracting the dates it can be compared to 4/24 (4 hours).

  • Related