I have many similar JSON objects with different sizes, I need to run through all keys with certain names and get values of it.
for example I have 3 JSONs:
[
{
"name": "temp",
"url": "http://temp.com",
"content": {
"content": "temp",
"url": "https://temp1.com"
}
},
{
"name": "temp",
"url": "http://temp.com",
"content": [
{
"content": "temp"
},
{
"url": "https://temp1.com"
}
]
},
{
"name": "temp",
"content": {
"content": "temp"
}
}
]
So I need extract the value from all keys "url
" (but this key is not necessary and can be not in json )
the target output:
http://temp.com
https://temp1.com
http://temp.com
https://temp1.com
CodePudding user response:
Load it using module json
and you will have normal list
with dictionares
and you can use .keys()
to get all keys and check if there is 'url'
.
And you need recursion to get keys
from nested lists
/dictionares
.
Minimal working code with your example data
text = '''[
{
"name": "temp",
"url": "http://temp.com",
"content": {
"content": "temp",
"url": "https://temp1.com"
}
},
{
"name": "temp",
"url": "http://temp.com",
"content": [
{
"content": "temp"
},
{
"url": "https://temp1.com"
}
]
},
{
"name": "temp",
"content": {
"content": "temp"
}
}
]'''
import json
def get_url(data, key):
if isinstance(data, list):
# check nested elements
for item in data:
yield from get_url(item, key)
elif isinstance(data, dict):
# check key in dictionary
if key in data.keys():
#print(data[key])
yield data[key]
# check nested elements
for item in data.values():
yield from get_url(item, key)
# --- main ---
#with open('data.json') as fh:
# data = json.load(fh)
data = json.loads(text)
results = list(get_url(data, 'url'))
print(results)
Result:
['http://temp.com', 'https://temp1.com', 'http://temp.com', 'https://temp1.com']
If you want to get also path
to element
def get_url(data, key, path=''):
if isinstance(data, list):
for number, item in enumerate(data):
yield from get_url(item, key, f'{path}[{number}]')
elif isinstance(data, dict):
if key in data.keys():
#print(data[key])
yield (data[key], f'{path}["{key}"]')
for name, item in data.items():
yield from get_url(item, key, f'{path}["{name}"]')
Result:
[
('http://temp.com', '[0]["url"]'),
('https://temp1.com', '[0]["content"]["url"]'),
('http://temp.com', '[1]["url"]'),
('https://temp1.com', '[1]["content"][1]["url"]')
]
CodePudding user response:
I think a recursive generator is probably the way to go about this:
def iter_values(key, list_or_dict):
if isinstance(list_or_dict, list):
# we're inside a list now, so yield from each element
for obj in list_or_dict:
if isinstance(obj, (list, dict)):
yield from iter_values(key, obj)
# we're dealing with a dict,
# so check if it has the key and if so,
# yield it
if key in list_or_dict:
yield list_or_dict[key]
# it's possible there is a nested object in a different key
# (e.g. your "content" which has a "url"),
# so iterate over those as well
for k, v in list_or_dict.items():
if k != key and isinstance(v, (list, dict)):
yield from iter_values(v)
If you don't know how generators work, you can turn it into a list by doing
list(iter_values("url", data))