I'm trying to run a query which gets the OHLC statistics from stock trades. To do this, I've created the following query:
SELECT
SUM(T.size),
SUM(T.size) / SUM(T.price),
FIRST_VALUE(T.price),
LAST_VALUE(T.price),
MAX(T.price),
MIN(T.price),
FLOOR(T.sip_timestamp / 3600000000000)
FROM (
SELECT *
FROM trades AS T
WHERE
T.symbol = 'NTAP' AND
T.sip_timestamp >= 1640995200000000000 AND
T.sip_timestamp < 1672531200000000000
ORDER BY T.sip_timestamp) AS T
GROUP BY FLOOR(T.sip_timestamp / 3600000000000)
This is supposed to work by filtering the trades data to a timestamp window, grouping it by a time bucket (equal to one hour) and then calculating the statistics based on this. The volume, weighted value, high, low and opening time values are easy but I'm having trouble generating the open and closing prices. I'm using the FIRST_VALUE
and LAST_VALUE
windowed functions but I'm not sure how to make these work in a grouping context. Does anyone know how to get the proper result here?
CodePudding user response:
So FIRST_VALUE and LAST_VALUE are a window frame functions not aggregate functions, thus do not operate in the domain of a GROUP BY
So what you want to do is define the scope of the FIRST_VALUE via a partition and order by clause, and then in the GROUP BY use an ANY_VALUE
thus:
with trades(symbol, sip_timestamp, size, price) as (
select * from values
('NTAP', 1640995200000000000, 100, 1.234),
('NTAP', 1640995200000000001, 100, 1.111)
)
SELECT
bucket,
SUM(T.size),
SUM(T.size) / SUM(T.price),
ANY_VALUE(b_fv) as bucket_first_value,
ANY_VALUE(b_lv) as bucket_last_value,
MAX(T.price) as max_price,
MIN(T.price) as min_price
FROM (
SELECT *
,FLOOR(T.sip_timestamp / 3600000000000) as bucket
,FIRST_VALUE(T.price) over (partition by bucket order by T.sip_timestamp) as b_fv
,LAST_VALUE(T.price) over (partition by bucket order by T.sip_timestamp) as b_lv
FROM trades AS T
WHERE
T.symbol = 'NTAP' AND
T.sip_timestamp >= 1640995200000000000 AND
T.sip_timestamp < 1672531200000000000
) AS T
GROUP BY bucket
now given you had a sub-select I put the FIRST_VALUES/LAST_VALUE there as nested window function don't play nice together.
I also put the bucket
in that layer also.
BUCKET | SUM(T.SIZE) | SUM(T.SIZE) / SUM(T.PRICE) | BUCKET_FIRST_VALUE | BUCKET_LAST_VALUE | MAX_PRICE | MIN_PRICE |
---|---|---|---|---|---|---|
455,832 | 200 | 85.287846 | 1.234 | 1.111 | 1.234 | 1.111 |
if you have many values you can use ROW_NUMBER and IFF to have a single value, and then use a MAX, so this gives the same results:
SELECT
bucket,
SUM(T.size),
SUM(T.size) / SUM(T.price),
max(b_fv) as bucket_first_value,
max(b_lv) as bucket_last_value,
MAX(T.price) as max_price,
MIN(T.price) as min_price
FROM (
SELECT *
,FLOOR(T.sip_timestamp / 3600000000000) as bucket
,IFF(row_number() over (partition by bucket order by T.sip_timestamp) = 1 , T.price, null) as b_fv
,IFF(row_number() over (partition by bucket order by T.sip_timestamp desc) = 1 , T.price, null) as b_lv
FROM trades AS T
WHERE
T.symbol = 'NTAP' AND
T.sip_timestamp >= 1640995200000000000 AND
T.sip_timestamp < 1672531200000000000
) AS T
GROUP BY bucket
now if you have many things you want with the same order you can split those apart like:
SELECT
bucket,
SUM(T.size),
SUM(T.size) / SUM(T.price),
max(b_fv) as bucket_first_value,
max(b_lv) as bucket_last_value,
max(b_fv_2) as bucket_first_value_2,
max(b_lv_2) as bucket_last_value_2,
MAX(T.price) as max_price,
MIN(T.price) as min_price
FROM (
SELECT *
,FLOOR(T.sip_timestamp / 3600000000000) as bucket
,row_number() over (partition by bucket order by T.sip_timestamp) as rw_f
,row_number() over (partition by bucket order by T.sip_timestamp desc) as rw_d
,IFF(rw_f = 1 , T.price, null) as b_fv
,IFF(rw_f = 1 , T.price, null) as b_fv_2
,IFF(rw_d = 1 , T.price, null) as b_lv
,IFF(rw_d = 1 , T.price, null) as b_lv_2
FROM trades AS T
WHERE
T.symbol = 'NTAP' AND
T.sip_timestamp >= 1640995200000000000 AND
T.sip_timestamp < 1672531200000000000
) AS T
GROUP BY bucket
Aggregating Function:
CREATE OR REPLACE FUNCTION WindowAggregator(bucket string, bucket_time float, size float, price float)
RETURNS TABLE (bucket string, sum float, avg float, first float, last float, max float, min float)
LANGUAGE JAVASCRIPT
AS '{
initialize: function (argumentInfo, context) {
this.ccount = 0;
this.size_sum = 0;
this.price_sum = 0;
this.price_min = Number.POSITIVE_INFINITY;
this.price_max = Number.NEGATIVE_INFINITY;
this.price_sum = 0;
this.last_key = Number.NEGATIVE_INFINITY;
this.last_val = 0;
this.first_key = Number.POSITIVE_INFINITY;
this.first_val = 0;
},
processRow: function get_params(row, rowWriter, context){
this.bucket = row.BUCKET;
if(row.PRICE < this.price_min ) {
this.price_min = row.PRICE;
}
if(row.PRICE > this.price_max ) {
this.price_max = row.PRICE;
}
if(row.PRICE < this.price_min ) {
this.price_min = row.PRICE;
}
if(row.BUCKET_TIME > this.last_key ) {
this.last_key = row.BUCKET_TIME;
this.last_val = row.PRICE;
}
if(row.BUCKET_TIME < this.first_key ) {
this.first_key = row.BUCKET_TIME;
this.first_val = row.PRICE;
}
this.price_sum = row.PRICE;
this.size_sum = row.SIZE;
},
finalize: function (rowWriter, context) {
rowWriter.writeRow({
BUCKET: this.bucket,
SUM: this.size_sum,
AVG: this.size_sum/this.price_sum,
FIRST: this.first_val,
LAST: this.last_val,
MAX: this.price_max,
MIN: this.price_min});
},
}';
with trades(symbol, sip_timestamp, size, price) as (
select * from values
('NTAP', 1640995200000000000, 100, 1.234),
('NTAP', 1640995200000000001, 10, 77.7),
('NTAP', 1640995200000000002, 100, 1.111)
)
SELECT
wa.*
FROM (
SELECT *
,FLOOR(T.sip_timestamp / 3600000000000) as bucket
,(T.sip_timestamp % 3600000000000)::float as bucket_time
FROM trades AS T
WHERE
T.symbol = 'NTAP' AND
T.sip_timestamp >= 1640995200000000000 AND
T.sip_timestamp < 1672531200000000000
) AS T
cross join TABLE(WindowAggregator(t.bucket::text, T.bucket_time, T.size::float, T.price::float) over (partition by t.bucket)) as wa;
gives:
BUCKET | SUM | AVG | FIRST | LAST | MAX | MIN |
---|---|---|---|---|---|---|
455832 | 210 | 2.623524268 | 1.234 | 1.111 | 77.7 | 1.111 |