I have data that looks like this:
WITH raw_string AS (
SELECT '{
"484934206380132":["__time__":"1657740233","__qtype__":"message"]
,"492645072399511":{"__time__":"1657740228","__qtype__":"text"}
,"386574019969974":{"__time__":"1657740228","__qtype__":"text"}
,"1043183892945050":{"__time__":"1657740228","subquestions":"[\"Kick-off call\",\"Creative presentation\",\"Design Sessions\",\"Final deliverables (assets)\"]","__qtype__":"ratingmatrix"}
,"3184511478430353":{"__time__":"1657740228","subquestions":"[\"Making ads for Meta platforms\",\"Producing video content\",\"Applying mobile creative best practices\"]","__qtype__":"ratingmatrix"}
,"1115358562640462":{"__time__":"1657740228","__qtype__":"radio"}
,"392599788967548":{"__time__":"1657740228","__qtype__":"dropdown"}
,"318278303702075":{"__time__":"1657740228","translation":"07132022","__qtype__":"numeric_field"},"2629866570491655":{"__time__":"1657740228","__qtype__":"checkbox"}
,"481522393704422":{"__time__":"1657740228","translation":"Total Bet","__qtype__":"text"}
,"1091749101368816":{"__time__":"1657740228","translation":"The collaborative work","__qtype__":"text"}
,"503986774424936":{"__time__":"1657740228","__qtype__":"text"}
}' AS question_context
)
SELECT * FROM raw_string
I'd like to extract "subquestions" into separate rows (in an sql table): -- Making ads for Meta platforms -- Producing video content -- Applying mobile creatives best...
I have a tried a few functions json_extract_scalar, and Map functions etc, but I didn't succeed.
Thanks in advance for your help.
So the thing I was doing, which is a bit convoluted and leads to an incorrect solution is this:
WITH raw_string AS (
SELECT *
FROM
(VALUES ('{
"484934206380132":["__time__":"1657740233","__qtype__":"message"]
,"492645072399511":{"__time__":"1657740228","__qtype__":"text"}
,"386574019969974":{"__time__":"1657740228","__qtype__":"text"}
,"1043183892945050":{"__time__":"1657740228","subquestions":"[\"Kick-off call\",\"Creative presentation\",\"Design Sessions\",\"Final deliverables (assets)\"]","__qtype__":"ratingmatrix"}
,"3184511478430353":{"__time__":"1657740228","subquestions":"[\"Making ads for Meta platforms\",\"Producing video content\",\"Applying mobile creative best practices\"]","__qtype__":"ratingmatrix"}
,"1115358562640462":{"__time__":"1657740228","__qtype__":"radio"}
,"392599788967548":{"__time__":"1657740228","__qtype__":"dropdown"}
,"318278303702075":{"__time__":"1657740228","translation":"07132022","__qtype__":"numeric_field"},"2629866570491655":{"__time__":"1657740228","__qtype__":"checkbox"}
,"481522393704422":{"__time__":"1657740228","translation":"Total Bet","__qtype__":"text"}
,"1091749101368816":{"__time__":"1657740228","translation":"The collaborative work","__qtype__":"text"}
,"503986774424936":{"__time__":"1657740228","__qtype__":"text"}
}')) AS question_cntxt(questions_context)
)
SELECT * FROM (
SELECT * FROM (
SELECT * FROM raw_string
CROSS JOIN UNNEST(questions_context) AS t(qn_id, sub_qn)
)
CROSS JOIN UNNEST(sub_qn) as t(category, _values)
)
CROSS JOIN UNNEST(SPLIT(_values, ',')) AS t(_levels)
I come close to what am trying to do, but I end up with a lot more rows than I need. I think I'm getting closer though.
The thing am really trying to do is put together a datasets of questions and answers. I want to pair them up together nicely.
The data looks like this::
SELECT *
FROM
(VALUES ('{
"484934206380132":["__time__":"1657740233","__qtype__":"message"]
,"492645072399511":{"__time__":"1657740228","__qtype__":"text"}
,"386574019969974":{"__time__":"1657740228","__qtype__":"text"}
,"1043183892945050":{"__time__":"1657740228","subquestions":"[\"Kick-off call\",\"Creative presentation\",\"Design Sessions\",\"Final deliverables (assets)\"]","__qtype__":"ratingmatrix"}
,"3184511478430353":{"__time__":"1657740228","subquestions":"[\"Making ads for Meta platforms\",\"Producing video content\",\"Applying mobile creative best practices\"]","__qtype__":"ratingmatrix"}
,"1115358562640462":{"__time__":"1657740228","__qtype__":"radio"}
,"392599788967548":{"__time__":"1657740228","__qtype__":"dropdown"}
,"318278303702075":{"__time__":"1657740228","translation":"07132022","__qtype__":"numeric_field"},"2629866570491655":{"__time__":"1657740228","__qtype__":"checkbox"}
,"481522393704422":{"__time__":"1657740228","translation":"Total Bet","__qtype__":"text"}
,"1091749101368816":{"__time__":"1657740228","translation":"The collaborative work","__qtype__":"text"}
,"503986774424936":{"__time__":"1657740228","__qtype__":"text"}
}',
'{"1043183892945050":["Very satisfied","Very satisfied","Very satisfied","Very satisfied"]
,"1091749101368816":["All participants are engaging and open to feedback. The agency is ready to accept suggestions and proactively suggest ideas as well."]
,"1115358562640462":["5 - Very Satisfied"],"2629866570491655":["Yes",null,null]
,"318278303702075":["07122022"]
,"3184511478430353":["Somewhat confident","Somewhat confident","Somewhat confident"]
,"386574019969974":["NA"],"392599788967548":["Asia Pacific (APAC)"],"481522393704422":["Genero"]
}'
)) AS question_cntxt(questions, answers)
)
SELECT * FROM raw_string
I want to create a table where in the end I have a question:
|qn_id | category. |Answer |
|----------------|---------------------|----------------|
|1043183892945050|Kick-off call. | Very Satisfied.|
|1043183892945050|Creative presentation| Very satisfied |
|1043183892945050|Design sessions. | Very satisfied |
|1043183892945050|Final deri. | very sat. |
I was trying to take a shot at it one column at a time
CodePudding user response:
After fixing the json (the first property for some reason contains mailformed array, not another json object), you can do next:
WITH raw_string AS (
SELECT '{
"484934206380132":{"__time__":"1657740233","__qtype__":"message"}
,"492645072399511":{"__time__":"1657740228","__qtype__":"text"}
,"386574019969974":{"__time__":"1657740228","__qtype__":"text"}
,"1043183892945050":{"__time__":"1657740228","subquestions":"[\"Kick-off call\",\"Creative presentation\",\"Design Sessions\",\"Final deliverables (assets)\"]","__qtype__":"ratingmatrix"}
,"3184511478430353":{"__time__":"1657740228","subquestions":"[\"Making ads for Meta platforms\",\"Producing video content\",\"Applying mobile creative best practices\"]","__qtype__":"ratingmatrix"}
,"1115358562640462":{"__time__":"1657740228","__qtype__":"radio"}
,"392599788967548":{"__time__":"1657740228","__qtype__":"dropdown"}
,"318278303702075":{"__time__":"1657740228","translation":"07132022","__qtype__":"numeric_field"},"2629866570491655":{"__time__":"1657740228","__qtype__":"checkbox"}
,"481522393704422":{"__time__":"1657740228","translation":"Total Bet","__qtype__":"text"}
,"1091749101368816":{"__time__":"1657740228","translation":"The collaborative work","__qtype__":"text"}
,"503986774424936":{"__time__":"1657740228","__qtype__":"text"}
}' AS question_context
)
SELECT subquestion
FROM (
SELECT map_values( -- get values which is in turn array of maps
map_filter( -- get only records with subquestions
cast(json_parse(question_context) as map(varchar, map(varchar, varchar))),
(k, v)->element_at(v, 'subquestions') is not null
)
) mp_arr
FROM raw_string
) p
, unnest(mp_arr) as t(mp) -- unnest array of maps
, unnest(cast(json_parse(mp['subquestions']) as array(varchar))) as tt(subquestion) -- 'subquestions' are double encoded so need to decode them
Output:
subquestion |
---|
Kick-off call |
Creative presentation |
Design Sessions |
Final deliverables (assets) |
Making ads for Meta platforms |
Producing video content |
Applying mobile creative best practices |