How to remove repeating redundant data from PostgreSQL queries-CodePudding

I am trying to make the below query fragments to contain less repeating data and to look more elegant. Could you please advise?

SELECT 
 (select gender from jsonb_to_recordset(audience) as x(gender text[]) where x.gender is not null) as gender,
 (select age from jsonb_to_recordset(audience) as x(age text[]) where x.age is not null) as age,
 (select ethnicity from jsonb_to_recordset(audience) as x(ethnicity text[]) where x.ethnicity is not null) as ethnicity,
 (select continent from jsonb_to_recordset(params) as x(continent text[]) where x.continent is not null) as continent,
 (select country from jsonb_to_recordset(params) as x(country text[]) where x.country is not null) as country,
 (select city from jsonb_to_recordset(params) as x(city text[]) where x.city is not null) as city
FROM user_data

Is it possible to use the array or CTE returning ['gender', 'age', 'ethnicity', 'continent', 'country', 'city'] and to simulate some loop to avoid repeating the above 6 selects?

SELECT jsonb_build_array(
    jsonb_build_array(ARRAY(SELECT UNNEST(dpu1.gender) INTERSECT SELECT UNNEST(dpu2.gender)), COALESCE(dpu1.gender, ARRAY[]::text[]), COALESCE(dpu2.gender, ARRAY[]::text[])),
    jsonb_build_array(ARRAY(SELECT UNNEST(dpu1.age) INTERSECT SELECT UNNEST(dpu2.age)), COALESCE(dpu1.age, ARRAY[]::text[]), COALESCE(dpu2.age, ARRAY[]::text[])),
    jsonb_build_array(ARRAY(SELECT UNNEST(dpu1.ethnicity) INTERSECT SELECT UNNEST(dpu2.ethnicity)), COALESCE(dpu1.ethnicity, ARRAY[]::text[]), COALESCE(dpu2.ethnicity, ARRAY[]::text[])),
    jsonb_build_array(ARRAY(SELECT UNNEST(dpu1.continent) INTERSECT SELECT UNNEST(dpu2.continent)), COALESCE(dpu1.continent, ARRAY[]::text[]), COALESCE(dpu2.continent, ARRAY[]::text[])),
    jsonb_build_array(ARRAY(SELECT UNNEST(dpu1.country) INTERSECT SELECT UNNEST(dpu2.country)), COALESCE(dpu1.country, ARRAY[]::text[]), COALESCE(dpu2.country, ARRAY[]::text[])),
    jsonb_build_array(ARRAY(SELECT UNNEST(dpu1.city) INTERSECT SELECT UNNEST(dpu2.city)), COALESCE(dpu1.city, ARRAY[]::text[]), COALESCE(dpu2.city, ARRAY[]::text[]))
) as values
FROM data_per_user dpu1 CROSS JOIN data_per_user dpu2
WHERE dpu1.user_uuid <> dpu2.user_uuid

Here is the same story - i find intersecting array elements and have to repeat the same conversion 6 times. Is there more elegant approach?

SELECT
jsonb_build_array(
    ARRAY[jsonb_array_length(values->0->0), jsonb_array_length(values->0->1), jsonb_array_length(values->0->2)],
    ARRAY[jsonb_array_length(values->1->0), jsonb_array_length(values->1->1), jsonb_array_length(values->1->2)],
    ARRAY[jsonb_array_length(values->2->0), jsonb_array_length(values->2->1), jsonb_array_length(values->2->2)],
    ARRAY[jsonb_array_length(values->3->0), jsonb_array_length(values->3->1), jsonb_array_length(values->3->2)],
    ARRAY[jsonb_array_length(values->4->0), jsonb_array_length(values->4->1), jsonb_array_length(values->4->2)],
    ARRAY[jsonb_array_length(values->5->0), jsonb_array_length(values->5->1), jsonb_array_length(values->5->2)]
) as scores
FROM matched_users

And here i'm trying to count number of array elements in 2D array - same story. It looks like too many redundant repeating data. Please advise any ways of optimizing those queries.

I'm not asking for the exact solutions, just the ideas would be appreciated.

CodePudding user response：

For your first query, if your columns audience and params are of type jsonb, then you can directly use the json functions to extract some data. In particular you can read carefully the jsonpath chapters in the manual 8.14.7. jsonpath Type and 9.16.2. The SQL/JSON Path Language.

This is the equivalent json version of your query :

SELECT 
  jsonb_path_query(audience, '$[*] ? (@.gender <> null)')->>'gender' :: text[] AS gender
, jsonb_path_query(audience, '$[*] ? (@.age <> null)')->>'age' :: text[] AS age
, jsonb_path_query(audience, '$[*] ? (@.ethnicity <> null)')->>'ethnicity' :: text[] AS ethnicity
, jsonb_path_query(params, '$[*] ? (@.continent <> null)')->>'continent' :: text[] AS continent
, jsonb_path_query(params, '$[*] ? (@.country <> null)')->>'country' :: text[] AS country
, jsonb_path_query(params, '$[*] ? (@.city <> null)')->>'city' :: text[] AS city
FROM user_data

and which should be simplified as follow because the resulting data will automatically be set to NULL when their corresponding json value is null in the json columns audience or params :

SELECT 
  aud->>'gender' :: text[] AS gender
, aud->>'age' :: text[] AS age
, aud->>'ethnicity' :: text[] AS ethnicity
, par->>'continent' :: text[] AS continent
, par->>'country' :: text[] AS country
, par->>'city' :: text[] AS city
FROM user_data
CROSS JOIN LATERAL jsonb_path_query(audience, '$[*]) AS aud
CROSS JOIN LATERAL jsonb_path_query(params, '$[*]) AS par

Your third query

SELECT
jsonb_build_array(
    ARRAY[jsonb_array_length(values->0->0), jsonb_array_length(values->0->1), jsonb_array_length(values->0->2)],
    ARRAY[jsonb_array_length(values->1->0), jsonb_array_length(values->1->1), jsonb_array_length(values->1->2)],
    ARRAY[jsonb_array_length(values->2->0), jsonb_array_length(values->2->1), jsonb_array_length(values->2->2)],
    ARRAY[jsonb_array_length(values->3->0), jsonb_array_length(values->3->1), jsonb_array_length(values->3->2)],
    ARRAY[jsonb_array_length(values->4->0), jsonb_array_length(values->4->1), jsonb_array_length(values->4->2)],
    ARRAY[jsonb_array_length(values->5->0), jsonb_array_length(values->5->1), jsonb_array_length(values->5->2)]
) as scores
FROM matched_users

could be replaced by

SELECT jsonb_array_agg(c.arr ORDER BY c.a) AS scores
FROM 
   (SELECT a, jsonb_array_agg(jsonb_array_length(values#>array[a :: text, b :: text]) AS arr
      FROM matched_users
     CROSS JOIN generate_series(0,5) AS a
     CROSS JOIN generate_series(0,2) AS b
     GROUP BY a
     ORDER BY b
   ) AS c