Home > Mobile >  Nested dictionary path find by value
Nested dictionary path find by value

Time:10-13

I am using recursion to find some tree path of specific value from a nested JSON. For example, on the given JSON I am trying to find a full path tree for the src element. Note I have 2 src elements with the same value, my current code works fine for different src values but when I have the same value for src key, the result is not expected.

Current JSON:

{
  "imagepanel": {
    "image": [
      {
        "scaled_image": {
          "classes": "w-full",
          "aspect_ratios": "frame sm:4:3 xmed:4:3",
          "art_directions": [
            {
              "alt": "River waves",
              "src": "path/to/file/53339c03d67e6ee5-lesson-3.jpg",
              "type": "jpg",
              "media": "(min-width:900.1px)",
              "sizes": "50vw",
              "intrinsicwidth": "1411",
              "intrinsicheight": "1000"
            },
            {
              "alt": "River waves",
              "src": "path/to/file/53339c03d67e6ee5-lesson-3.jpg",
              "type": "jpg",
              "media": "(max-width:900px)",
              "sizes": "100vw",
              "intrinsicwidth": "0",
              "intrinsicheight": "0"
            }
          ]
        }
      }
    ],
    "title": "<p><span class=\"drop-cap\" drop-cap=\"true\">WHAT TO WATCH</span></p>",
    "cta_label": "SEE THE LIST",
    "left_image": true
  }
}

Current Code:

import json
import pprint
def breadcrumb_finder(json_dict_or_list, value):
    if json_dict_or_list == value:
        return [json_dict_or_list]
    elif isinstance(json_dict_or_list, dict):
        for k, v in json_dict_or_list.items():
            child = breadcrumb_finder(v, value)
            if child:
                return [k]   child
    elif isinstance(json_dict_or_list, list):
        lst = json_dict_or_list
        for i in range(len(lst)):
            child = breadcrumb_finder(lst[i], value)
            if child:
                return [str(i)]   child

data = r'''{"imagepanel":{"image":[{"scaled_image":{"classes":"w-full","aspect_ratios":"frame sm:4:3 xmed:4:3","art_directions":[{"alt":"River waves","src":"path/to/file/53339c03d67e6ee5-lesson-3.jpg","type":"jpg","media":"(min-width:900.1px)","sizes":"50vw","intrinsicwidth":"1411","intrinsicheight":"1000"},{"alt":"River waves","src":"path/to/file/53339c03d67e6ee5-lesson-3.jpg","type":"jpg","media":"(max-width:900px)","sizes":"100vw","intrinsicwidth":"0","intrinsicheight":"0"}]}}],"title":"<p><span class=\"drop-cap\" drop-cap=\"true\">WHAT TO WATCH</span></p>","cta_label":"SEE THE LIST","left_image":true}}'''
data = json.loads(data)

all_src = ['path/to/file/53339c03d67e6ee5-lesson-3.jpg', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']
for src in all_src:
    nested_path = breadcrumb_finder(data, src)
    print(nested_path)

Current Output:

['imagepanel', 'image', '0', 'scaled_image', 'art_directions', '0', 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']
['imagepanel', 'image', '0', 'scaled_image', 'art_directions', '0', 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']

                                                               ^^^ note index here

Expected Output:

['imagepanel', 'image', '0', 'scaled_image', 'art_directions', '0', 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']
['imagepanel', 'image', '0', 'scaled_image', 'art_directions', '1', 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']

                                                               ^^^ note index here

CodePudding user response:

After a little bit debugging I notice that the problem lies in this piece of code:

elif isinstance(json_dict_or_list, list):
    lst = json_dict_or_list
    for i in range(len(lst)):
        child = breadcrumb_finder(lst[i], value)
        if child:
            return [str(i)]   child

Since you call return when child array is not empty, It will also ignore all the element after lst[i] if the condition is satisfied.

Therefore I changed your code a little bit by using backtracking:

import json
import pprint
results = []
def breadcrumb_finder(json_dict_or_list, value, path, result):
    if json_dict_or_list == value:
        path.append(json_dict_or_list)
        result.append(path.copy())
        path.pop()
    elif isinstance(json_dict_or_list, dict):
        for k, v in json_dict_or_list.items():
            path.append(k)
            child = breadcrumb_finder(v, value, path, result)
            path.pop()
                
    elif isinstance(json_dict_or_list, list):
        lst = json_dict_or_list
        for i in range(len(lst)):
            path.append(i)
            child = breadcrumb_finder(lst[i], value, path, result)
            path.pop()       

data = r'''{"imagepanel":{"image":[{"scaled_image":{"classes":"w-full","aspect_ratios":"frame sm:4:3 xmed:4:3","art_directions":[{"alt":"River waves","src":"path/to/file/53339c03d67e6ee5-lesson-3.jpg","type":"jpg","media":"(min-width:900.1px)","sizes":"50vw","intrinsicwidth":"1411","intrinsicheight":"1000"},{"alt":"River waves","src":"path/to/file/53339c03d67e6ee5-lesson-3.jpg","type":"jpg","media":"(max-width:900px)","sizes":"100vw","intrinsicwidth":"0","intrinsicheight":"0"}]}}],"title":"<p><span class=\"drop-cap\" drop-cap=\"true\">WHAT TO WATCH</span></p>","cta_label":"SEE THE LIST","left_image":true}}'''
data = json.loads(data)

all_src = ['path/to/file/53339c03d67e6ee5-lesson-3.jpg']
for src in all_src:
    breadcrumb_finder(data, src, [], results)
    print(results)

This will ensure that the list will be iterated all the way. And the result:

[['imagepanel', 'image', 0, 'scaled_image', 'art_directions', 0, 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg'], ['imagepanel', 'image', 0, 'scaled_image', 'art_directions', 1, 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']]

Edit: I update the global variable results so that it won't be confused with the local variable

CodePudding user response:

import json
import pprint
def breadcrumb_finder(json_dict_or_list, value):
    if json_dict_or_list == value:
        return [json_dict_or_list]
    elif isinstance(json_dict_or_list, dict):
        for k, v in json_dict_or_list.items():
            child = breadcrumb_finder(v, value)
            if child:
                return [k]   child
    elif isinstance(json_dict_or_list, list):
        lst = json_dict_or_list
        for i in range(len(lst)):
            child = breadcrumb_finder(lst[i], value)
            if child:
                if child[0] != "src" or str(i) not in found_srcs:
                    found_srcs.append(str(i))
                    return [str(i)]   child

data = r'''{"imagepanel":{"image":[{"scaled_image":{"classes":"w-full","aspect_ratios":"frame sm:4:3 xmed:4:3","art_directions":[{"alt":"River waves","src":"path/to/file/53339c03d67e6ee5-lesson-3.jpg","type":"jpg","media":"(min-width:900.1px)","sizes":"50vw","intrinsicwidth":"1411","intrinsicheight":"1000"},{"alt":"River waves","src":"path/to/file/53339c03d67e6ee5-lesson-3.jpg","type":"jpg","media":"(max-width:900px)","sizes":"100vw","intrinsicwidth":"0","intrinsicheight":"0"}]}}],"title":"<p><span class=\"drop-cap\" drop-cap=\"true\">WHAT TO WATCH</span></p>","cta_label":"SEE THE LIST","left_image":true}}'''
data = json.loads(data)

all_src = ['path/to/file/53339c03d67e6ee5-lesson-3.jpg', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']
found_srcs = []
for src in all_src:
    nested_path = breadcrumb_finder(data, src)
    print(nested_path)

By storing the found srcs, you can skip over them. I found that the easiest way is just before adding the src number, you can check and skip that iteration entirely as in the code.

Output:

['imagepanel', 'image', '0', 'scaled_image', 'art_directions', '0', 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']
['imagepanel', 'image', '0', 'scaled_image', 'art_directions', '1', 'src', 'path/to/file/53339c03d67e6ee5-lesson-3.jpg']
  • Related