Home > other >  SQL Server Calculate Aggregated Difference in Data Windows
SQL Server Calculate Aggregated Difference in Data Windows

Time:11-11

I want to fetch some data from an SQL Server table by creating windows of 15 mins from the start time and find the difference between the MAX and MIN of a certain column (value) in these windows.

The SQL Table stores some telemetry data captured every minute.

Are there any T-SQL functionalities that I can utilize to get such output without running a loop (Windowed Aggregation sort, etc.)?

I have implemented this using a loop, but this is very slow as it would take data from every window and add it to a temp table. Below is a dumbed-down snippet of the code:

WHILE(@startTime <@endTimeInput)
BEGIN
    INSERT INTO #energyDataTable
    SELECT  MIN(timeStamp) as startTime, MAX(timeStamp) as endTime, ROUND(MAX (value) - MIN (value),2) as value
    FROM PLANT1.DATA_TABLE as ts
    WHERE ts.unixTimestamp>=@startTime and ts.unixTimestamp<= @endTime
    GROUP BY ts.logicalId
    
    --Increment startTime and endTime to goto next window
    SET @startTime = @endTime;
    SET @endTime = @startTime   15 * 60000; --converting 15 mins to millisecs
END
Timestamp value unixTime
2021-11-08 00:00:09.000 1527.6 1636329609000
2021-11-08 00:01:09.000 1528.1 1636329669000
2021-11-08 00:02:09.000 1528.6 1636329729000
.... .... ....
2021-11-08 00:13:09.000 1534.5 1636330389000
2021-11-08 00:14:09.000 1535.3 1636330479000
2021-11-08 00:15:09.000 1535.5 1636330509000
2021-11-08 00:16:09.000 1536.0 1636330569000
2021-11-08 00:17:09.000 1528.6 1636330629000
.... .... ....
2021-11-08 00:28:09.000 1542.5 1636331289000
2021-11-08 00:29:09.000 1543.3 1636331379000

Needs to be transformed to (I don't necessarily need end_time: Added for better understanding):

start_time end_time value
2021-11-08 00:00:09.000 2021-11-08 00:14:39.000 7.7
2021-11-08 00:15:09.000 2021-11-08 00:29:39.000 7.8

CodePudding user response:

This shows each 15 minute block along with the difference between the min & max value contained within the block:

with src as (
    select *
          ,chunk = datediff(second,min("Timestamp") over (order by "Timestamp"),"Timestamp") / (15*60)
    from (values
         ('2021-11-08 00:00:09.000',1527.6,1636329609000)
        ,('2021-11-08 00:01:09.000',1528.1,1636329669000)
        ,('2021-11-08 00:02:09.000',1528.6,1636329729000)
        ,('2021-11-08 00:13:09.000',1534.5,1636330389000)
        ,('2021-11-08 00:14:09.000',1535.3,1636330479000)
        ,('2021-11-08 00:15:09.000',1535.5,1636330509000)
        ,('2021-11-08 00:16:09.000',1536.0,1636330569000)
        ,('2021-11-08 00:17:09.000',1528.6,1636330629000)
        ,('2021-11-08 00:28:09.000',1542.5,1636331289000)
        ,('2021-11-08 00:29:09.000',1543.3,1636331379000)
         ) src("Timestamp","value",unixTime)
)
select chunk
      ,start_time = min("Timestamp")
      ,end_time   = max("Timestamp")
      ,value      = round(max(value) - min(value),2)
from src
group by chunk

Result:

| chunk | start_time              | end_time                | value |
 ------- ------------------------- ------------------------- ------- 
|     0 | 2021-11-08 00:00:09.000 | 2021-11-08 00:14:09.000 |   7.7 |
|     1 | 2021-11-08 00:15:09.000 | 2021-11-08 00:29:09.000 |  14.7 |

If you want the difference in values between the first and last timestamp in each block, it's a little more involved:

with src as (
    select *
          ,chunk = datediff(second,min("Timestamp") over (order by "Timestamp"),"Timestamp") / (15*60)
    from (values
         ('2021-11-08 00:00:09.000',1527.6,1636329609000)
        ,('2021-11-08 00:01:09.000',1528.1,1636329669000)
        ,('2021-11-08 00:02:09.000',1528.6,1636329729000)
        ,('2021-11-08 00:13:09.000',1534.5,1636330389000)
        ,('2021-11-08 00:14:09.000',1535.3,1636330479000)
        ,('2021-11-08 00:15:09.000',1535.5,1636330509000)
        ,('2021-11-08 00:16:09.000',1536.0,1636330569000)
        ,('2021-11-08 00:17:09.000',1528.6,1636330629000)
        ,('2021-11-08 00:28:09.000',1542.5,1636331289000)
        ,('2021-11-08 00:29:09.000',1543.3,1636331379000)
         ) src("Timestamp","value",unixTime)
)
,boundaries as (
    select chunk
          ,start_time  = min("Timestamp")   over (partition by chunk)
          ,  end_time  = max("Timestamp")   over (partition by chunk)
          ,start_value = first_value(value) over (partition by chunk order by "Timestamp")
          , last_value =  last_value(value) over (partition by chunk order by "Timestamp" rows between current_row and unbounded following)
    from src
)
select distinct
       chunk
      ,start_time
      ,end_time
      ,round(last_value - start_value,2)
from boundaries

Result:

| chunk | start_time              | end_time                |     |
 ------- ------------------------- ------------------------- ----- 
|     0 | 2021-11-08 00:00:09.000 | 2021-11-08 00:14:09.000 | 7.7 |
|     1 | 2021-11-08 00:15:09.000 | 2021-11-08 00:29:09.000 | 7.8 |

CodePudding user response:

Data preparation:

declare @Telemetry table 
(
    [Timestamp] datetime2(0), 
    [Value] decimal(5,1), 
    UnixTime bigint
);

insert into @Telemetry 
values
    ('2021-11-08 00:00:09', 1527.6, 1636329609000),
    ('2021-11-08 00:01:09', 1528.1, 1636329669000),
    ('2021-11-08 00:02:09', 1528.6, 1636329729000),
    ('2021-11-08 00:13:09', 1534.5, 1636330389000),
    ('2021-11-08 00:14:09', 1535.3, 1636330479000),
    ('2021-11-08 00:15:09', 1535.5, 1636330509000),
    ('2021-11-08 00:16:09', 1536.0, 1636330569000),
    ('2021-11-08 00:17:09', 1528.6, 1636330629000),
    ('2021-11-08 00:28:09', 1542.5, 1636331289000),
    ('2021-11-08 00:29:09', 1543.3, 1636331379000);    

And solution:

declare @StartTimestamp datetime2(0) = (select min([Timestamp]) from @Telemetry);

with N as 
(
    select n from (values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)) t(n)    
),
Numbers as
(
    select
        row_number() over(order by (select 1)) as Number
    from 
        N n1, N n2, N n3 -- N n4, N n5, N n6
),
Dates as
(
    select 
        Number,
        StartDateTime = dateadd(minute, (Number - 1) * 15, @StartTimestamp),
        EndDateTime   = dateadd(minute,  Number      * 15, @StartTimestamp)
    from
        Numbers
)
select
    start_time  = min([Timestamp]),
    end_time    = max([Timestamp]),
    [value]     = round(max([Value]) - min([Value]), 2)
from
    @Telemetry t
    inner join Dates d on StartDateTime <= t.[Timestamp] and t.[Timestamp] < EndDateTime
group by
    Number;

SQL Fiddle

CodePudding user response:

Generally we replace a loop with a tally table. For example on-the-fly tally

declare @dstart datetime = '2021-11-08 00:00:00.000';
declare @window int = 15;

with t0(n) as (
 select n 
 from (
    values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)
    ) t(n)
), tally as(
   select row_number() over(order by t1.n)  n
   from t0 t1, t0 t2, t0 t3
)
select s.start, max(value) - min(value) delta
from tally
cross apply (
   values (dateadd(minute, @window*(n-1), @dstart))  
   ) s(start)
join  myTable t on t.Timestamp>= s.start and t.Timestamp < dateadd(minute, @window, s.start)
group by s.start
order by s.start
  • Related