I am trying to make the below query fragments to contain less repeating data and to look more elegant. Could you please advise?
SELECT
(select gender from jsonb_to_recordset(audience) as x(gender text[]) where x.gender is not null) as gender,
(select age from jsonb_to_recordset(audience) as x(age text[]) where x.age is not null) as age,
(select ethnicity from jsonb_to_recordset(audience) as x(ethnicity text[]) where x.ethnicity is not null) as ethnicity,
(select continent from jsonb_to_recordset(params) as x(continent text[]) where x.continent is not null) as continent,
(select country from jsonb_to_recordset(params) as x(country text[]) where x.country is not null) as country,
(select city from jsonb_to_recordset(params) as x(city text[]) where x.city is not null) as city
FROM user_data
Is it possible to use the array or CTE returning ['gender', 'age', 'ethnicity', 'continent', 'country', 'city'] and to simulate some loop to avoid repeating the above 6 selects?
SELECT jsonb_build_array(
jsonb_build_array(ARRAY(SELECT UNNEST(dpu1.gender) INTERSECT SELECT UNNEST(dpu2.gender)), COALESCE(dpu1.gender, ARRAY[]::text[]), COALESCE(dpu2.gender, ARRAY[]::text[])),
jsonb_build_array(ARRAY(SELECT UNNEST(dpu1.age) INTERSECT SELECT UNNEST(dpu2.age)), COALESCE(dpu1.age, ARRAY[]::text[]), COALESCE(dpu2.age, ARRAY[]::text[])),
jsonb_build_array(ARRAY(SELECT UNNEST(dpu1.ethnicity) INTERSECT SELECT UNNEST(dpu2.ethnicity)), COALESCE(dpu1.ethnicity, ARRAY[]::text[]), COALESCE(dpu2.ethnicity, ARRAY[]::text[])),
jsonb_build_array(ARRAY(SELECT UNNEST(dpu1.continent) INTERSECT SELECT UNNEST(dpu2.continent)), COALESCE(dpu1.continent, ARRAY[]::text[]), COALESCE(dpu2.continent, ARRAY[]::text[])),
jsonb_build_array(ARRAY(SELECT UNNEST(dpu1.country) INTERSECT SELECT UNNEST(dpu2.country)), COALESCE(dpu1.country, ARRAY[]::text[]), COALESCE(dpu2.country, ARRAY[]::text[])),
jsonb_build_array(ARRAY(SELECT UNNEST(dpu1.city) INTERSECT SELECT UNNEST(dpu2.city)), COALESCE(dpu1.city, ARRAY[]::text[]), COALESCE(dpu2.city, ARRAY[]::text[]))
) as values
FROM data_per_user dpu1 CROSS JOIN data_per_user dpu2
WHERE dpu1.user_uuid <> dpu2.user_uuid
Here is the same story - i find intersecting array elements and have to repeat the same conversion 6 times. Is there more elegant approach?
SELECT
jsonb_build_array(
ARRAY[jsonb_array_length(values->0->0), jsonb_array_length(values->0->1), jsonb_array_length(values->0->2)],
ARRAY[jsonb_array_length(values->1->0), jsonb_array_length(values->1->1), jsonb_array_length(values->1->2)],
ARRAY[jsonb_array_length(values->2->0), jsonb_array_length(values->2->1), jsonb_array_length(values->2->2)],
ARRAY[jsonb_array_length(values->3->0), jsonb_array_length(values->3->1), jsonb_array_length(values->3->2)],
ARRAY[jsonb_array_length(values->4->0), jsonb_array_length(values->4->1), jsonb_array_length(values->4->2)],
ARRAY[jsonb_array_length(values->5->0), jsonb_array_length(values->5->1), jsonb_array_length(values->5->2)]
) as scores
FROM matched_users
And here i'm trying to count number of array elements in 2D array - same story. It looks like too many redundant repeating data. Please advise any ways of optimizing those queries.
I'm not asking for the exact solutions, just the ideas would be appreciated.
CodePudding user response:
For your first query, if your columns audience
and params
are of type jsonb
, then you can directly use the json functions to extract some data. In particular you can read carefully the jsonpath chapters in the manual 8.14.7. jsonpath Type and 9.16.2. The SQL/JSON Path Language.
This is the equivalent json version of your query :
SELECT
jsonb_path_query(audience, '$[*] ? (@.gender <> null)')->>'gender' :: text[] AS gender
, jsonb_path_query(audience, '$[*] ? (@.age <> null)')->>'age' :: text[] AS age
, jsonb_path_query(audience, '$[*] ? (@.ethnicity <> null)')->>'ethnicity' :: text[] AS ethnicity
, jsonb_path_query(params, '$[*] ? (@.continent <> null)')->>'continent' :: text[] AS continent
, jsonb_path_query(params, '$[*] ? (@.country <> null)')->>'country' :: text[] AS country
, jsonb_path_query(params, '$[*] ? (@.city <> null)')->>'city' :: text[] AS city
FROM user_data
and which should be simplified as follow because the resulting data will automatically be set to NULL
when their corresponding json
value is null
in the json columns audience
or params
:
SELECT
aud->>'gender' :: text[] AS gender
, aud->>'age' :: text[] AS age
, aud->>'ethnicity' :: text[] AS ethnicity
, par->>'continent' :: text[] AS continent
, par->>'country' :: text[] AS country
, par->>'city' :: text[] AS city
FROM user_data
CROSS JOIN LATERAL jsonb_path_query(audience, '$[*]) AS aud
CROSS JOIN LATERAL jsonb_path_query(params, '$[*]) AS par
Your third query
SELECT
jsonb_build_array(
ARRAY[jsonb_array_length(values->0->0), jsonb_array_length(values->0->1), jsonb_array_length(values->0->2)],
ARRAY[jsonb_array_length(values->1->0), jsonb_array_length(values->1->1), jsonb_array_length(values->1->2)],
ARRAY[jsonb_array_length(values->2->0), jsonb_array_length(values->2->1), jsonb_array_length(values->2->2)],
ARRAY[jsonb_array_length(values->3->0), jsonb_array_length(values->3->1), jsonb_array_length(values->3->2)],
ARRAY[jsonb_array_length(values->4->0), jsonb_array_length(values->4->1), jsonb_array_length(values->4->2)],
ARRAY[jsonb_array_length(values->5->0), jsonb_array_length(values->5->1), jsonb_array_length(values->5->2)]
) as scores
FROM matched_users
could be replaced by
SELECT jsonb_array_agg(c.arr ORDER BY c.a) AS scores
FROM
(SELECT a, jsonb_array_agg(jsonb_array_length(values#>array[a :: text, b :: text]) AS arr
FROM matched_users
CROSS JOIN generate_series(0,5) AS a
CROSS JOIN generate_series(0,2) AS b
GROUP BY a
ORDER BY b
) AS c