I want to fetch some data from an SQL Server table by creating windows of 15 mins from the start time and find the difference between the MAX and MIN of a certain column (value) in these windows.
The SQL Table stores some telemetry data captured every minute.
Are there any T-SQL functionalities that I can utilize to get such output without running a loop (Windowed Aggregation sort, etc.)?
I have implemented this using a loop, but this is very slow as it would take data from every window and add it to a temp table. Below is a dumbed-down snippet of the code:
WHILE(@startTime <@endTimeInput)
BEGIN
INSERT INTO #energyDataTable
SELECT MIN(timeStamp) as startTime, MAX(timeStamp) as endTime, ROUND(MAX (value) - MIN (value),2) as value
FROM PLANT1.DATA_TABLE as ts
WHERE ts.unixTimestamp>=@startTime and ts.unixTimestamp<= @endTime
GROUP BY ts.logicalId
--Increment startTime and endTime to goto next window
SET @startTime = @endTime;
SET @endTime = @startTime 15 * 60000; --converting 15 mins to millisecs
END
Timestamp | value | unixTime |
---|---|---|
2021-11-08 00:00:09.000 | 1527.6 | 1636329609000 |
2021-11-08 00:01:09.000 | 1528.1 | 1636329669000 |
2021-11-08 00:02:09.000 | 1528.6 | 1636329729000 |
.... | .... | .... |
2021-11-08 00:13:09.000 | 1534.5 | 1636330389000 |
2021-11-08 00:14:09.000 | 1535.3 | 1636330479000 |
2021-11-08 00:15:09.000 | 1535.5 | 1636330509000 |
2021-11-08 00:16:09.000 | 1536.0 | 1636330569000 |
2021-11-08 00:17:09.000 | 1528.6 | 1636330629000 |
.... | .... | .... |
2021-11-08 00:28:09.000 | 1542.5 | 1636331289000 |
2021-11-08 00:29:09.000 | 1543.3 | 1636331379000 |
Needs to be transformed to (I don't necessarily need end_time: Added for better understanding):
start_time | end_time | value |
---|---|---|
2021-11-08 00:00:09.000 | 2021-11-08 00:14:39.000 | 7.7 |
2021-11-08 00:15:09.000 | 2021-11-08 00:29:39.000 | 7.8 |
CodePudding user response:
This shows each 15 minute block along with the difference between the min & max value contained within the block:
with src as (
select *
,chunk = datediff(second,min("Timestamp") over (order by "Timestamp"),"Timestamp") / (15*60)
from (values
('2021-11-08 00:00:09.000',1527.6,1636329609000)
,('2021-11-08 00:01:09.000',1528.1,1636329669000)
,('2021-11-08 00:02:09.000',1528.6,1636329729000)
,('2021-11-08 00:13:09.000',1534.5,1636330389000)
,('2021-11-08 00:14:09.000',1535.3,1636330479000)
,('2021-11-08 00:15:09.000',1535.5,1636330509000)
,('2021-11-08 00:16:09.000',1536.0,1636330569000)
,('2021-11-08 00:17:09.000',1528.6,1636330629000)
,('2021-11-08 00:28:09.000',1542.5,1636331289000)
,('2021-11-08 00:29:09.000',1543.3,1636331379000)
) src("Timestamp","value",unixTime)
)
select chunk
,start_time = min("Timestamp")
,end_time = max("Timestamp")
,value = round(max(value) - min(value),2)
from src
group by chunk
Result:
| chunk | start_time | end_time | value |
------- ------------------------- ------------------------- -------
| 0 | 2021-11-08 00:00:09.000 | 2021-11-08 00:14:09.000 | 7.7 |
| 1 | 2021-11-08 00:15:09.000 | 2021-11-08 00:29:09.000 | 14.7 |
If you want the difference in values between the first and last timestamp in each block, it's a little more involved:
with src as (
select *
,chunk = datediff(second,min("Timestamp") over (order by "Timestamp"),"Timestamp") / (15*60)
from (values
('2021-11-08 00:00:09.000',1527.6,1636329609000)
,('2021-11-08 00:01:09.000',1528.1,1636329669000)
,('2021-11-08 00:02:09.000',1528.6,1636329729000)
,('2021-11-08 00:13:09.000',1534.5,1636330389000)
,('2021-11-08 00:14:09.000',1535.3,1636330479000)
,('2021-11-08 00:15:09.000',1535.5,1636330509000)
,('2021-11-08 00:16:09.000',1536.0,1636330569000)
,('2021-11-08 00:17:09.000',1528.6,1636330629000)
,('2021-11-08 00:28:09.000',1542.5,1636331289000)
,('2021-11-08 00:29:09.000',1543.3,1636331379000)
) src("Timestamp","value",unixTime)
)
,boundaries as (
select chunk
,start_time = min("Timestamp") over (partition by chunk)
, end_time = max("Timestamp") over (partition by chunk)
,start_value = first_value(value) over (partition by chunk order by "Timestamp")
, last_value = last_value(value) over (partition by chunk order by "Timestamp" rows between current_row and unbounded following)
from src
)
select distinct
chunk
,start_time
,end_time
,round(last_value - start_value,2)
from boundaries
Result:
| chunk | start_time | end_time | |
------- ------------------------- ------------------------- -----
| 0 | 2021-11-08 00:00:09.000 | 2021-11-08 00:14:09.000 | 7.7 |
| 1 | 2021-11-08 00:15:09.000 | 2021-11-08 00:29:09.000 | 7.8 |
CodePudding user response:
Data preparation:
declare @Telemetry table
(
[Timestamp] datetime2(0),
[Value] decimal(5,1),
UnixTime bigint
);
insert into @Telemetry
values
('2021-11-08 00:00:09', 1527.6, 1636329609000),
('2021-11-08 00:01:09', 1528.1, 1636329669000),
('2021-11-08 00:02:09', 1528.6, 1636329729000),
('2021-11-08 00:13:09', 1534.5, 1636330389000),
('2021-11-08 00:14:09', 1535.3, 1636330479000),
('2021-11-08 00:15:09', 1535.5, 1636330509000),
('2021-11-08 00:16:09', 1536.0, 1636330569000),
('2021-11-08 00:17:09', 1528.6, 1636330629000),
('2021-11-08 00:28:09', 1542.5, 1636331289000),
('2021-11-08 00:29:09', 1543.3, 1636331379000);
And solution:
declare @StartTimestamp datetime2(0) = (select min([Timestamp]) from @Telemetry);
with N as
(
select n from (values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)) t(n)
),
Numbers as
(
select
row_number() over(order by (select 1)) as Number
from
N n1, N n2, N n3 -- N n4, N n5, N n6
),
Dates as
(
select
Number,
StartDateTime = dateadd(minute, (Number - 1) * 15, @StartTimestamp),
EndDateTime = dateadd(minute, Number * 15, @StartTimestamp)
from
Numbers
)
select
start_time = min([Timestamp]),
end_time = max([Timestamp]),
[value] = round(max([Value]) - min([Value]), 2)
from
@Telemetry t
inner join Dates d on StartDateTime <= t.[Timestamp] and t.[Timestamp] < EndDateTime
group by
Number;
CodePudding user response:
Generally we replace a loop with a tally table. For example on-the-fly tally
declare @dstart datetime = '2021-11-08 00:00:00.000';
declare @window int = 15;
with t0(n) as (
select n
from (
values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)
) t(n)
), tally as(
select row_number() over(order by t1.n) n
from t0 t1, t0 t2, t0 t3
)
select s.start, max(value) - min(value) delta
from tally
cross apply (
values (dateadd(minute, @window*(n-1), @dstart))
) s(start)
join myTable t on t.Timestamp>= s.start and t.Timestamp < dateadd(minute, @window, s.start)
group by s.start
order by s.start