I have the following data and want to get the lines of the same unit, but the time difference is within 4 hours. I was thinking of splitting 1 day into 4 hour intervals and creating multiple columns, but is there a more optimal way
31/05 00:44 | Unit A
30/05 09:34 | Unit A
30/05 14:42 | Unit B "Time ≤ 4hr → right conditions"
30/05 14:50 | Unit B
30/05 14:43 | Unit B
30/05 14:49 | Unit B
30/05 14:53 | Unit B
30/05 14:41 | Unit B
30/05 14:38 | Unit B
30/05 14:38 | Unit B
30/05 14:39 | Unit B
30/05 14:41 | Unit B
30/05 14:40 | Unit B
30/05 14:52 | Unit B
30/05 14:50 | Unit B
31/05 02:17 | Unit C
31/05 02:37 | Unit C
31/05 04:15 | Unit D "Time ≤ 4hr → right conditions"
31/05 04:32 | Unit D
31/05 04:12 | Unit D
31/05 02:11 | Unit D
31/05 02:26 | Unit D
31/05 04:11 | Unit D
31/05 02:16 | Unit D
31/05 01:48 | Unit D
31/05 01:54 | Unit D
31/05 01:50 | Unit D
31/05 05:11 | Unit E "Time > 4hr → not right conditions"
31/05 01:47 | Unit E
31/05 01:44 | Unit E
31/05 01:44 | Unit E
**30/05 16:06** | Unit E
31/05 01:58 | Unit E
31/05 01:47 | Unit E
31/05 09:18 | Unit E
31/05 09:14 | Unit E
31/05 01:41 | Unit E
31/05 02:31 | Unit F
31/05 01:41 | Unit F
31/05 04:10 | Unit F
31/05 04:20 | Unit F
31/05 02:42 | Unit F
Output I want: ]
CodePudding user response:
All time dimensions converted to minutes, time limit set to 240 min (4h), count limit set to 10. The result contains all the units for better understanding of the code. Units that have occurencies through more then one day handled too.
WITH
tbl AS
(
Select
a.UNITS "UNIT",
a.DATES "DT_TM",
Count(UNITS) OVER(PARTITION BY UNITS ORDER BY UNITS) "COUNT_OF_UNITS",
SubStr(a.DATES, 4, 2) || SubStr(a.DATES, 1, 2) "MONTH_DAY",
(To_Number(SubStr(a.DATES, 7, 2)) * 60) To_Number(SubStr(a.DATES, 10, 2)) "MINUTE_OF_DAY",
CASE
WHEN SubStr(a.DATES, 4, 2) || SubStr(a.DATES, 1, 2) = Min(SubStr(a.DATES, 4, 2) || SubStr(a.DATES, 1, 2)) OVER (PARTITION BY a.UNITS ORDER BY a.UNITS, SubStr(a.DATES, 4, 2) || SubStr(a.DATES, 1, 2))
THEN (24 * 60) - ((To_Number(SubStr(a.DATES, 7, 2)) * 60) To_Number(SubStr(a.DATES, 10, 2)))
ELSE 0
END "MINUTES_TO_MIDNIGHT"
From
A_TABLE a
),
days AS
(
SELECT
UNITS "UNIT",
Max(DATES) "MAX_DATES",
Max(SubStr(DATES, 4, 2) || SubStr(DATES, 1, 2)) "MONTH_DAY",
To_Number(Max(SubStr(DATES, 4, 2) || SubStr(DATES, 1, 2))) - To_Number(Min(SubStr(DATES, 4, 2) || SubStr(DATES, 1, 2))) "DAY_DIFF"
FROM
A_TABLE
GROUP BY
UNITS
HAVING
To_Number(Max(SubStr(DATES, 4, 2) || SubStr(DATES, 1, 2))) - To_Number(Min(SubStr(DATES, 4, 2) || SubStr(DATES, 1, 2))) > 0
),
mins AS
(
Select
t.UNIT,
4 * 60 "MINUTES_LIMIT",
10 "COUNT_LIMIT",
COUNT_OF_UNITS "COUNT_OF_UNITS",
CASE
WHEN Nvl(d.DAY_DIFF, 0) = 0
THEN SubStr(t.DT_TM, 1, 5)
ELSE LPAD(To_Char(To_Number(SubStr(t.DT_TM, 1, 2)) d.DAY_DIFF), 2, '0') || SubStr(t.DT_TM, 3, 3)
END "DT",
SubStr(t.DT_TM, 7, 5) "TM",
SubStr(t.DT_TM, 4, 2) || SubStr(t.DT_TM, 1, 2) || SubStr(t.DT_TM, 7, 2) || SubStr(t.DT_TM, 10, 2) "DT_TM",
Nvl(d.DAY_DIFF, 0) "DAY_DIFF",
CASE
WHEN Nvl(d.DAY_DIFF, 0) = 0
THEN t.MINUTE_OF_DAY
ELSE (SELECT (To_Number(SubStr(MAX_DATES, 7, 2)) * 60) To_Number(SubStr(MAX_DATES, 10, 2)) FROM days WHERE UNIT = t.UNIT)
END "MINUTE_OF_DAY",
CASE
WHEN Nvl(d.DAY_DIFF, 0) = 0
THEN 0
ELSE t.MINUTES_TO_MIDNIGHT
END "MINUTES_TO_MIDNIGHT"
From
tbl t
LEFT JOIN
days d ON(d.UNIT = t.UNIT)
WHERE
(Nvl(d.DAY_DIFF, 0) = 0 And CASE WHEN Nvl(d.DAY_DIFF, 0) = 0 THEN 0 ELSE t.MINUTES_TO_MIDNIGHT END = 0)
OR
CASE WHEN Nvl(d.DAY_DIFF, 0) = 0 THEN 0 ELSE t.MINUTES_TO_MIDNIGHT END > 0
ORDER BY
t.UNIT,
SubStr(t.DT_TM, 4, 2) || SubStr(t.DT_TM, 1, 2) || SubStr(t.DT_TM, 7, 2) || SubStr(t.DT_TM, 10, 2)
)
SELECT
UNIT,
COUNT_OF_UNITS,
CASE
WHEN DAY_DIFF = 0
THEN MAX_MINUTE_OF_DAY - MIN_MINUTE_OF_DAY
ELSE (MAX_MINUTE_OF_DAY MAX_MINUTES_TO_MIDNIGHT) ((DAY_DIFF - 1) * 24 * 60)
END "MINUTES_DIFF",
MINUTES_LIMIT "TIME_LIMIT",
CASE
WHEN MINUTES_LIMIT < CASE
WHEN DAY_DIFF = 0
THEN MAX_MINUTE_OF_DAY - MIN_MINUTE_OF_DAY
ELSE (MAX_MINUTE_OF_DAY MAX_MINUTES_TO_MIDNIGHT) ((DAY_DIFF - 1) * 24 * 60)
END
THEN 'OVER_THE_LIMIT'
ELSE 'OK'
END "TIME_STATUS",
COUNT_LIMIT "COUNT_LIMIT",
CASE
WHEN COUNT_OF_UNITS < 10
THEN 'BELOW_THE_LIMIT'
ELSE 'OK'
END "COUNT_STATUS"
FROM
(
SELECT
m.UNIT "UNIT",
m.DT "DT",
m.DAY_DIFF "DAY_DIFF",
m.MINUTES_LIMIT "MINUTES_LIMIT",
m.COUNT_LIMIT "COUNT_LIMIT",
COUNT_OF_UNITS "COUNT_OF_UNITS",
Min(m.MINUTE_OF_DAY) "MIN_MINUTE_OF_DAY",
Max(m.MINUTE_OF_DAY) "MAX_MINUTE_OF_DAY",
Max(m.MINUTES_TO_MIDNIGHT) "MAX_MINUTES_TO_MIDNIGHT"
FROM
mins m
GROUP BY
m.UNIT,
m.DT,
m.DAY_DIFF,
m.MINUTES_LIMIT,
m.COUNT_LIMIT,
COUNT_OF_UNITS
ORDER BY
m.UNIT
)
--
-- R e s u l t :
--
-- UNIT COUNT_OF_UNITS MINUTES_DIFF TIME_LIMIT TIME_STATUS COUNT_LIMIT COUNT_STATUS
-- ------ -------------- ------------ ---------- -------------- ----------- ---------------
-- Unit A 2 910 240 OVER_THE_LIMIT 10 BELOW_THE_LIMIT
-- Unit B **--> 13 15 240 OK 10 OK <--**
-- Unit C 2 20 240 OK 10 BELOW_THE_LIMIT
-- Unit D **--> 10 164 240 OK 10 OK <--**
-- Unit E 10 1032 240 OVER_THE_LIMIT 10 BELOW_THE_LIMIT
-- Unit F 5 159 240 OK 10 BELOW_THE_LIMIT
CodePudding user response:
I would suggest using a "gaps and islands" approach to the data. This uses 2 row_number() calculations to calculate a "group number" (grp) and once that is determined then the duration of each group can be calculated.
select
grp
, max(unit) unit
, to_char(min(logtime),'dd/mm/yyyy hh24:ss') mindt
, to_char(max(logtime),'dd/mm/yyyy hh24:ss') maxdt
, case when max(logtime) - min(logtime) <= 4/24 then 'Good' else 'Bad' end is_ok
, count(*) counted
, max(note) note
from (
select
row_number() over(order by (select 1 from dual))
- row_number() over(partition by unit order by (select 1 from dual)) as grp
, Logtime
, Unit
, note
from unitlog
) d
group by
grp
-- having max(logtime) - min(logtime) <= 4/24
-- and count(*) >= 10
order by
mindt
GRP UNIT MINDT MAXDT IS_OK COUNTED NOTE
0 Unit A 30/05/2022 09:34 30/05/2022 09:34 Good 1
2 Unit B 30/05/2022 14:38 30/05/2022 14:53 Good 13 "Time ≤ 4hr → right conditions"
27 Unit E 30/05/2022 16:06 31/05/2022 09:18 Bad 10 "Time > 4hr → not right conditions"
40 Unit A 31/05/2022 00:44 31/05/2022 00:44 Good 1
37 Unit F 31/05/2022 01:41 31/05/2022 04:20 Good 4
17 Unit D 31/05/2022 01:48 31/05/2022 04:32 Good 10 "Time ≤ 4hr → right conditions"
15 Unit C 31/05/2022 02:17 31/05/2022 02:37 Good 2
-3 Unit F 31/05/2022 02:42 31/05/2022 02:42 Good 1
Note that if the having clause seen above was applied the result would be:
GRP UNIT MINDT MAXDT IS_OK COUNTED NOTE
2 Unit B 30/05/2022 14:38 30/05/2022 14:53 Good 13 "Time ≤ 4hr → right conditions"
17 Unit D 31/05/2022 01:48 31/05/2022 04:32 Good 10 "Time ≤ 4hr → right conditions"
see: https://dbfiddle.uk/?rdbms=oracle_11.2&fiddle=139bf2bc925f8f6830ccc3b46d4c3113
Nb: I have assumed that the date/time information is stored as an Oracle date data type and when subtracting the dates it can be compared to 4/24 (4 hours).