I am using recursion to find some tree path of specific value from a nested JSON. For example, on the given JSON I am trying to find a full path tree for the src element. Note I have 2 src elements with the same value, my current code works fine for different src values but when I have the same value for src key, the result is not expected.
Current JSON:
{
"imagepanel": {
"image": [
{
"scaled_image": {
"classes": "w-full",
"aspect_ratios": "frame sm:4:3 xmed:4:3",
"art_directions": [
{
"alt": "River waves",
"src": "path/to/file/53339c03d67e6ee5-lesson-3.jpg",
"type": "jpg",
"media": "(min-width:900.1px)",
"sizes": "50vw",
"intrinsicwidth": "1411",
"intrinsicheight": "1000"
},
{
"alt": "River waves",
"src": "path/to/file/53339c03d67e6ee5-lesson-3.jpg",
"type": "jpg",
"media": "(max-width:900px)",
"sizes": "100vw",
"intrinsicwidth": "0",
"intrinsicheight": "0"
}
]
}
}
],
"title": "<p><span class=\"drop-cap\" drop-cap=\"true\">WHAT TO WATCH</span></p>",
"cta_label": "SEE THE LIST",
"left_image": true
}
}
Current Code:
import json
import pprint
def breadcrumb_finder(json_dict_or_list, value):
if json_dict_or_list == value:
return [json_dict_or_list]
elif isinstance(json_dict_or_list, dict):
for k, v in json_dict_or_list.items():
child = breadcrumb_finder(v, value)
if child:
return [k] child
elif isinstance(json_dict_or_list, list):
lst = json_dict_or_list
for i in range(len(lst)):
child = breadcrumb_finder(lst[i], value)
if child:
return [str(i)] child
data = r'''{"imagepanel":{"image":[{"scaled_image":{"classes":"w-full","aspect_ratios":"frame sm:4:3 xmed:4:3","art_directions":[{"alt":"River waves","src":"path/to/file/53339c03d67e6ee5-lesson-3.jpg","type":"jpg","media":"(min-width:900.1px)","sizes":"50vw","intrinsicwidth":"1411","intrinsicheight":"1000"},{"alt":"River waves","src":"path/to/file/53339c03d67e6ee5-lesson-3.jpg","type":"jpg","media":"(max-width:900px)","sizes":"100vw","intrinsicwidth":"0","intrinsicheight":"0"}]}}],"title":"<p><span class=\"drop-cap\" drop-cap=\"true\">WHAT TO WATCH</span></p>","cta_label":"SEE THE LIST","left_image":true}}'''
data = json.loads(data)
all_src = ['path/to/file/53339c03d67e6ee5-lesson-3.jpg', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']
for src in all_src:
nested_path = breadcrumb_finder(data, src)
print(nested_path)
Current Output:
['imagepanel', 'image', '0', 'scaled_image', 'art_directions', '0', 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']
['imagepanel', 'image', '0', 'scaled_image', 'art_directions', '0', 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']
^^^ note index here
Expected Output:
['imagepanel', 'image', '0', 'scaled_image', 'art_directions', '0', 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']
['imagepanel', 'image', '0', 'scaled_image', 'art_directions', '1', 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']
^^^ note index here
CodePudding user response:
After a little bit debugging I notice that the problem lies in this piece of code:
elif isinstance(json_dict_or_list, list):
lst = json_dict_or_list
for i in range(len(lst)):
child = breadcrumb_finder(lst[i], value)
if child:
return [str(i)] child
Since you call return when child
array is not empty, It will also ignore all the element after lst[i]
if the condition is satisfied.
Therefore I changed your code a little bit by using backtracking:
import json
import pprint
results = []
def breadcrumb_finder(json_dict_or_list, value, path, result):
if json_dict_or_list == value:
path.append(json_dict_or_list)
result.append(path.copy())
path.pop()
elif isinstance(json_dict_or_list, dict):
for k, v in json_dict_or_list.items():
path.append(k)
child = breadcrumb_finder(v, value, path, result)
path.pop()
elif isinstance(json_dict_or_list, list):
lst = json_dict_or_list
for i in range(len(lst)):
path.append(i)
child = breadcrumb_finder(lst[i], value, path, result)
path.pop()
data = r'''{"imagepanel":{"image":[{"scaled_image":{"classes":"w-full","aspect_ratios":"frame sm:4:3 xmed:4:3","art_directions":[{"alt":"River waves","src":"path/to/file/53339c03d67e6ee5-lesson-3.jpg","type":"jpg","media":"(min-width:900.1px)","sizes":"50vw","intrinsicwidth":"1411","intrinsicheight":"1000"},{"alt":"River waves","src":"path/to/file/53339c03d67e6ee5-lesson-3.jpg","type":"jpg","media":"(max-width:900px)","sizes":"100vw","intrinsicwidth":"0","intrinsicheight":"0"}]}}],"title":"<p><span class=\"drop-cap\" drop-cap=\"true\">WHAT TO WATCH</span></p>","cta_label":"SEE THE LIST","left_image":true}}'''
data = json.loads(data)
all_src = ['path/to/file/53339c03d67e6ee5-lesson-3.jpg']
for src in all_src:
breadcrumb_finder(data, src, [], results)
print(results)
This will ensure that the list will be iterated all the way. And the result:
[['imagepanel', 'image', 0, 'scaled_image', 'art_directions', 0, 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg'], ['imagepanel', 'image', 0, 'scaled_image', 'art_directions', 1, 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']]
Edit: I update the global variable results
so that it won't be confused with the local variable
CodePudding user response:
import json
import pprint
def breadcrumb_finder(json_dict_or_list, value):
if json_dict_or_list == value:
return [json_dict_or_list]
elif isinstance(json_dict_or_list, dict):
for k, v in json_dict_or_list.items():
child = breadcrumb_finder(v, value)
if child:
return [k] child
elif isinstance(json_dict_or_list, list):
lst = json_dict_or_list
for i in range(len(lst)):
child = breadcrumb_finder(lst[i], value)
if child:
if child[0] != "src" or str(i) not in found_srcs:
found_srcs.append(str(i))
return [str(i)] child
data = r'''{"imagepanel":{"image":[{"scaled_image":{"classes":"w-full","aspect_ratios":"frame sm:4:3 xmed:4:3","art_directions":[{"alt":"River waves","src":"path/to/file/53339c03d67e6ee5-lesson-3.jpg","type":"jpg","media":"(min-width:900.1px)","sizes":"50vw","intrinsicwidth":"1411","intrinsicheight":"1000"},{"alt":"River waves","src":"path/to/file/53339c03d67e6ee5-lesson-3.jpg","type":"jpg","media":"(max-width:900px)","sizes":"100vw","intrinsicwidth":"0","intrinsicheight":"0"}]}}],"title":"<p><span class=\"drop-cap\" drop-cap=\"true\">WHAT TO WATCH</span></p>","cta_label":"SEE THE LIST","left_image":true}}'''
data = json.loads(data)
all_src = ['path/to/file/53339c03d67e6ee5-lesson-3.jpg', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']
found_srcs = []
for src in all_src:
nested_path = breadcrumb_finder(data, src)
print(nested_path)
By storing the found srcs, you can skip over them. I found that the easiest way is just before adding the src number, you can check and skip that iteration entirely as in the code.
Output:
['imagepanel', 'image', '0', 'scaled_image', 'art_directions', '0', 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']
['imagepanel', 'image', '0', 'scaled_image', 'art_directions', '1', 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']