Home > Back-end >  Get column from first and last entry of grouping in Snowflake query
Get column from first and last entry of grouping in Snowflake query

Time:11-21

I'm trying to run a query which gets the OHLC statistics from stock trades. To do this, I've created the following query:

SELECT 
    SUM(T.size), 
    SUM(T.size) / SUM(T.price), 
    FIRST_VALUE(T.price), 
    LAST_VALUE(T.price),
    MAX(T.price),
    MIN(T.price),
    FLOOR(T.sip_timestamp / 3600000000000)
FROM (
    SELECT *
    FROM trades AS T
    WHERE 
        T.symbol = 'NTAP' AND 
        T.sip_timestamp >= 1640995200000000000 AND
        T.sip_timestamp < 1672531200000000000
    ORDER BY T.sip_timestamp) AS T
GROUP BY FLOOR(T.sip_timestamp / 3600000000000)

This is supposed to work by filtering the trades data to a timestamp window, grouping it by a time bucket (equal to one hour) and then calculating the statistics based on this. The volume, weighted value, high, low and opening time values are easy but I'm having trouble generating the open and closing prices. I'm using the FIRST_VALUE and LAST_VALUE windowed functions but I'm not sure how to make these work in a grouping context. Does anyone know how to get the proper result here?

CodePudding user response:

So FIRST_VALUE and LAST_VALUE are a window frame functions not aggregate functions, thus do not operate in the domain of a GROUP BY

So what you want to do is define the scope of the FIRST_VALUE via a partition and order by clause, and then in the GROUP BY use an ANY_VALUE

thus:

with trades(symbol, sip_timestamp, size, price) as (
    select * from values
        ('NTAP', 1640995200000000000, 100, 1.234),
        ('NTAP', 1640995200000000001, 100, 1.111)
)
SELECT 
    bucket,
    SUM(T.size), 
    SUM(T.size) / SUM(T.price), 
    ANY_VALUE(b_fv) as bucket_first_value, 
    ANY_VALUE(b_lv) as bucket_last_value, 
    MAX(T.price) as max_price,
    MIN(T.price) as min_price
FROM (
    SELECT *
        ,FLOOR(T.sip_timestamp / 3600000000000) as bucket
        ,FIRST_VALUE(T.price) over (partition by bucket order by T.sip_timestamp) as b_fv
        ,LAST_VALUE(T.price) over (partition by bucket order by T.sip_timestamp) as b_lv 
    FROM trades AS T
    WHERE 
        T.symbol = 'NTAP' AND 
        T.sip_timestamp >= 1640995200000000000 AND
        T.sip_timestamp < 1672531200000000000
    ) AS T
GROUP BY bucket

now given you had a sub-select I put the FIRST_VALUES/LAST_VALUE there as nested window function don't play nice together.

I also put the bucket in that layer also.

BUCKET SUM(T.SIZE) SUM(T.SIZE) / SUM(T.PRICE) BUCKET_FIRST_VALUE BUCKET_LAST_VALUE MAX_PRICE MIN_PRICE
455,832 200 85.287846 1.234 1.111 1.234 1.111

if you have many values you can use ROW_NUMBER and IFF to have a single value, and then use a MAX, so this gives the same results:

SELECT 
    bucket,
    SUM(T.size), 
    SUM(T.size) / SUM(T.price), 
    max(b_fv) as bucket_first_value, 
    max(b_lv) as bucket_last_value, 
    MAX(T.price) as max_price,
    MIN(T.price) as min_price
FROM (
    SELECT *
        ,FLOOR(T.sip_timestamp / 3600000000000) as bucket
        ,IFF(row_number() over (partition by bucket order by T.sip_timestamp) = 1 , T.price, null) as b_fv
        ,IFF(row_number() over (partition by bucket order by T.sip_timestamp desc) = 1 , T.price, null) as b_lv 
    FROM trades AS T
    WHERE 
        T.symbol = 'NTAP' AND 
        T.sip_timestamp >= 1640995200000000000 AND
        T.sip_timestamp < 1672531200000000000
    ) AS T
GROUP BY bucket

now if you have many things you want with the same order you can split those apart like:

SELECT 
    bucket,
    SUM(T.size), 
    SUM(T.size) / SUM(T.price), 
    max(b_fv) as bucket_first_value, 
    max(b_lv) as bucket_last_value, 
    max(b_fv_2) as bucket_first_value_2, 
    max(b_lv_2) as bucket_last_value_2, 
    MAX(T.price) as max_price,
    MIN(T.price) as min_price
FROM (
    SELECT *
        ,FLOOR(T.sip_timestamp / 3600000000000) as bucket
        ,row_number() over (partition by bucket order by T.sip_timestamp) as rw_f
        ,row_number() over (partition by bucket order by T.sip_timestamp desc) as rw_d
        ,IFF(rw_f = 1 , T.price, null) as b_fv
        ,IFF(rw_f = 1 , T.price, null) as b_fv_2
        ,IFF(rw_d = 1 , T.price, null) as b_lv 
        ,IFF(rw_d = 1 , T.price, null) as b_lv_2
    FROM trades AS T
    WHERE 
        T.symbol = 'NTAP' AND 
        T.sip_timestamp >= 1640995200000000000 AND
        T.sip_timestamp < 1672531200000000000
    ) AS T
GROUP BY bucket

Aggregating Function:

CREATE OR REPLACE FUNCTION WindowAggregator(bucket string, bucket_time float, size float, price float)
    RETURNS TABLE (bucket string, sum float, avg float, first float, last float, max float, min float)
    LANGUAGE JAVASCRIPT
    AS '{
            initialize: function (argumentInfo, context) {
               this.ccount = 0;
               this.size_sum = 0;
               this.price_sum = 0;
               this.price_min = Number.POSITIVE_INFINITY;
               this.price_max = Number.NEGATIVE_INFINITY;
               this.price_sum = 0;
               this.last_key = Number.NEGATIVE_INFINITY;
               this.last_val = 0;
               this.first_key = Number.POSITIVE_INFINITY;
               this.first_val = 0;
            },
            processRow: function get_params(row, rowWriter, context){
                    this.bucket = row.BUCKET;
                    if(row.PRICE < this.price_min ) { 
                        this.price_min = row.PRICE; 
                    }
                    if(row.PRICE > this.price_max ) { 
                        this.price_max = row.PRICE; 
                    }
                    if(row.PRICE < this.price_min ) { 
                        this.price_min = row.PRICE; 
                    }
                    if(row.BUCKET_TIME > this.last_key ) { 
                        this.last_key = row.BUCKET_TIME; 
                        this.last_val = row.PRICE; 
                    }
                    if(row.BUCKET_TIME < this.first_key ) { 
                        this.first_key = row.BUCKET_TIME;
                        this.first_val = row.PRICE;
                    }
                    this.price_sum  = row.PRICE;
                    this.size_sum  = row.SIZE;
            },
            finalize: function (rowWriter, context) {
                rowWriter.writeRow({
                    BUCKET: this.bucket, 
                    SUM: this.size_sum, 
                    AVG: this.size_sum/this.price_sum, 
                    FIRST: this.first_val, 
                    LAST: this.last_val, 
                    MAX: this.price_max, 
                    MIN: this.price_min});
            },
        }';
with trades(symbol, sip_timestamp, size, price) as (
    select * from values
        ('NTAP', 1640995200000000000, 100, 1.234),
        ('NTAP', 1640995200000000001, 10, 77.7),
        ('NTAP', 1640995200000000002, 100, 1.111)
)
SELECT 
    wa.*
FROM (
    SELECT *
        ,FLOOR(T.sip_timestamp / 3600000000000) as bucket
        ,(T.sip_timestamp % 3600000000000)::float as bucket_time
    FROM trades AS T
    WHERE 
        T.symbol = 'NTAP' AND 
        T.sip_timestamp >= 1640995200000000000 AND
        T.sip_timestamp < 1672531200000000000
    ) AS T
cross join TABLE(WindowAggregator(t.bucket::text, T.bucket_time, T.size::float, T.price::float) over (partition by t.bucket)) as wa;

gives:

BUCKET SUM AVG FIRST LAST MAX MIN
455832 210 2.623524268 1.234 1.111 77.7 1.111
  • Related