Given two arrays like a = [10, 20, 30], and b = [9, 21, 32], how can I construct an array that consists of the minimum or maximum element based on index in snowflake, i.e. the desired output for minimum is [9,20,30] and for the maximum is [10,21,32]?
I looked at snowflake's array functions and didn't find a function that does this.
CodePudding user response:
Using numbers table/[]
to access elements and ARRAY_AGG
to build new arrays:
WITH cte AS (
SELECT ARRAY_CONSTRUCT(10, 20, 30) AS a, ARRAY_CONSTRUCT(9, 21, 32) AS b
), numbers AS (
SELECT ROW_NUMBER() OVER(ORDER BY seq4())-1 AS IND
FROM TABLE(GENERATOR(ROWCOUNT => 10001))
)
SELECT a,b
,ARRAY_AGG(LEAST(a[ind], b[ind])) WITHIN GROUP(ORDER BY n.ind) AS min_array
,ARRAY_AGG(GREATEST(a[ind], b[ind])) WITHIN GROUP(ORDER BY n.ind) AS max_array
FROM cte
JOIN numbers n
ON n.ind < GREATEST(ARRAY_SIZE(a), ARRAY_SIZE(b))
GROUP BY a,b;
Output:
CodePudding user response:
If the arrays are always the same size (and reusing Lukasz great data cte):
WITH cte AS (
SELECT ARRAY_CONSTRUCT(10, 20, 30) AS a, ARRAY_CONSTRUCT(9, 21, 32) AS b
)
SELECT a,b
,ARRAY_AGG(LEAST(a[n.index], b[n.index])) WITHIN GROUP(ORDER BY n.index) AS min_array
,ARRAY_AGG(GREATEST(a[n.index], b[n.index])) WITHIN GROUP(ORDER BY n.index) AS max_array
FROM cte
,table(flatten(a)) n
GROUP BY 1,2;
gives:
A | B | MIN_ARRAY | MAX_ARRAY |
---|---|---|---|
[ 10, 20, 30 ] | [ 9, 21, 32 ] | [ 9, 20, 30 ] | [ 10, 21, 32 ] |
And if you have uneven lists:
WITH cte AS (
SELECT ARRAY_CONSTRUCT(10, 20, 30) AS a, ARRAY_CONSTRUCT(9, 21, 32) AS b
union all
SELECT ARRAY_CONSTRUCT(10, 20, 30) AS a, ARRAY_CONSTRUCT(9, 21, 32, 45) AS b
)
SELECT a,b
,ARRAY_AGG(LEAST(a[n.index], b[n.index])) WITHIN GROUP(ORDER BY n.index) AS min_array
,ARRAY_AGG(GREATEST(a[n.index], b[n.index])) WITHIN GROUP(ORDER BY n.index) AS max_array
FROM cte
,table(flatten(iff(array_size(a)>=array_size(b), a, b))) n
GROUP BY 1,2;
A | B | MIN_ARRAY | MAX_ARRAY |
---|---|---|---|
[ 10, 20, 30 ] | [ 9, 21, 32 ] | [ 9, 20, 30 ] | [ 10, 21, 32 ] |
[ 10, 20, 30 ] | [ 9, 21, 32, 45 ] | [ 9, 20, 30 ] | [ 10, 21, 32 ] |
will pick the largest, but given the NULL from the smaller list will cause LEAST/GREATEST to return NULL and ARRAY_AGG drops nulls, you don't even need to size compare, unless you want to NVL/COALESCE that values to safe values for nulls.
SELECT 1 as a, null as b, least(a,b);
gives:
A | B | LEAST(A,B) |
---|---|---|
1 | null | null |
like so:
SELECT a,b
,ARRAY_AGG(LEAST(nvl(a[n.index],10000), nvl(b[n.index],10000))) WITHIN GROUP(ORDER BY n.index) AS min_array
,ARRAY_AGG(GREATEST(nvl(a[n.index],0), nvl(b[n.index],0))) WITHIN GROUP(ORDER BY n.index) AS max_array
FROM cte
,table(flatten(iff(array_size(a)>=array_size(b), a, b))) n
GROUP BY 1,2;
A | B | MIN_ARRAY | MAX_ARRAY |
---|---|---|---|
[ 10, 20, 30 ] | [ 9, 21, 32 ] | [ 9, 20, 30 ] | [ 10, 21, 32 ] |
[ 10, 20, 30 ] | [ 9, 21, 32, 45 ] | [ 9, 20, 30, 45 ] | [ 10, 21, 32, 45 ] |