I have column content in my database that is a type of STRING with JSON structure:
{
"id": "12",
"Name": "Ana",
"lastName": "Johnes",
"data": [{
"id": "2",
"value": "mr"
}, {
"id": "3",
"value": "Ana"
}
]
}
Schema of myTable:
Content (STRING)
I want to hash and update the 'value' inside the data. I tried using REPLACE function like this but it didn't replace the array "data" with hashed "value":
UPDATE `myTable` set content=
REPLACE(content, ARRAY_TO_STRING(ARRAY(
SELECT
data
FROM
UNNEST(json_extract_array(content,
"$.data"))AS data), ","),ARRAY_TO_STRING(ARRAY(
SELECT
REPLACE(data, "\""||json_value(data,
"$.value")|| "\"", '"'||`hashField`(json_extract_scalar(data,
'$.value'))||'"' )
FROM
UNNEST(json_extract_array(content,
"$.data")) AS data), ","))
WHERE true
Expected output in column content should look like this:
{
"id": "12",
"Name": "Ana",
"lastName": "Johnes",
"data": [{
"id": "2",
"value": "1256f52fe6125"
}, {
"id": "3",
"value": "6712gf7e1fe76"
}
]
}
CodePudding user response:
Consider query below:
with sample_data as (
select '{"id": "12","Name": "Ana","lastName": "Johnes","data": [{"id": "2","value": "mr"}, {"id": "3","value": "Ana" }] }' as json_string
),
hash_value as (
select
struct(
json_extract_scalar(json_string,'$.id') as id,
json_extract_scalar(json_string,'$.Name') as Name,
json_extract_scalar(json_string,'$.lastName') as lastName,
array(
select as struct
json_extract_scalar(arr_data, '$.id') as id,
sha1(json_extract_scalar(arr_data, '$.value')) as value
from unnest(json_query_array(json_string,'$.data')) as arr_data) as data
) as json_struct
from sample_data
)
select
to_json_string(json_struct) as json_string
from hash_value
Output:
UPDATE (Add UPDATE statement)
NOTE: UPDATE
needs a WHERE statement to determine what data point to update. Since you did not present any unique column, I added a column id so I can use this a reference for the UPDATE
statement.
update `your-project.your_dataset.your_table`
set json_string = new_json_string
from (
with hash_value as (
select
struct(
json_extract_scalar(json_string,'$.id') as id,
json_extract_scalar(json_string,'$.Name') as Name,
json_extract_scalar(json_string,'$.lastName') as lastName,
array(
select as struct
json_extract_scalar(arr_data, '$.id') as id,
sha1(json_extract_scalar(arr_data, '$.value')) as value
from unnest(json_query_array(json_string,'$.data')) as arr_data) as data
) as json_struct
from `your-project.your_dataset.your_table`
)
select
to_json_string(json_struct) as new_json_string
from hash_value
)
where id = 111
CodePudding user response:
The hash function can be SHA1(input), it doesn't really matter, what is important is that I update 'value'
Based on above statement, I assume you just really want to mask real values, the replacement value really do not matter - if so consider below naïve and super simple approach
select regexp_replace(Content, r'"value":\s*"([^"] )"', '"value": "XXX')
from your_table
if applied to sample data in your question - output (of that column) is
And if you need to update that field in the table - you can simply use
update your_table
set Content = regexp_replace(Content, r'"value":\s*"([^"] )"', '"value": "XXX')
where true