Home > front end >  Get key hierarchy from a nested dict of other lists/dicts in Python
Get key hierarchy from a nested dict of other lists/dicts in Python

Time:01-11

I have an input dict like so:

input={'boo': 'its', 'soo': 'your', 'roo': 'choice', 'qoo': 'this', 'fizz': 'is', 'buzz': 'very', 'yoyo': 'rambling', 'wazzw': 'lorem', 'bnn': 'ipsum', 'cc': [{'boo': 'fill', 'soo': 'ing', 'roo': 'in', 'qoo': 'the', 'fizz': 'words', 'buzz': 'here', 'yoyo': 'we', 'wazzw': 'go', 'nummm': 2, 'bsdfff': 3, 'hgdjgkk': 4, 'opu': 1, 'mnb': True}, {'boo': 'again', 'soo': 'loop', 'roo': 'de', 'qoo': 'loop', 'fizz': 'wowzers', 'buzz': 'try', 'yoyo': 'again', 'wazzw': 'how', 'nummm': 1, 'bsdfff': 7, 'hgdjgkk': 0, 'opu': 1, 'mnb': True}], 'soos': ['ya'], 'tyu': 'doin', 'dddd3': 'today'}

Using python builtin libraries how to get hierarchy (dot separated) of each key. ie:

expected_output=['boo','soo','roo','qoo','fizz','buzz','yoyo','wazzw','bnn','cc','cc.boo','cc.soo','cc.roo','cc.qoo','cc.fizz','cc.buzz','cc.yoyo','cc.wazzw','cc.nummm','cc.bsdfff','cc.hgdjgkk','cc.opu','cc.mnb','soos','tyu','dddd3']

First attempt is not handling lists:

def getKeys(object, prev_key = None, keys = []):
if type(object) != type({}):
    keys.append(prev_key)
    return keys
new_keys = []
for k, v in object.items():
    if prev_key != None:
        new_key = "{}.{}".format(prev_key, k)
    else:
        new_key = k
    new_keys.extend(getKeys(v, new_key, []))
return new_keys

CodePudding user response:

To deal with a sub-list, you can iteratively check if each sub-item is a dict, and if it is, recursively append the key paths of the sub-dict to the current key:

def get_keys(d):
    keys = []
    for key, value in d.items():
        if isinstance(value, list):
            for obj in value:
                if isinstance(obj, dict):
                    for path in get_keys(obj):
                        keys.append(f'{key}.{path}')
                else:
                    keys.append(key)
        else:
            keys.append(key)
    return keys

so that given your sample input, get_keys(input) would return:

['boo', 'soo', 'roo', 'qoo', 'fizz', 'buzz', 'yoyo', 'wazzw', 'bnn', 'cc.boo', 'cc.soo', 'cc.roo', 'cc.qoo', 'cc.fizz', 'cc.buzz', 'cc.yoyo', 'cc.wazzw', 'cc.nummm', 'cc.bsdfff', 'cc.hgdjgkk', 'cc.opu', 'cc.mnb', 'cc.boo', 'cc.soo', 'cc.roo', 'cc.qoo', 'cc.fizz', 'cc.buzz', 'cc.yoyo', 'cc.wazzw', 'cc.nummm', 'cc.bsdfff', 'cc.hgdjgkk', 'cc.opu', 'cc.mnb', 'soos', 'tyu', 'dddd3']

Demo: https://replit.com/@blhsing/OpenGoldenIntegrationtesting

CodePudding user response:

Using a recursive generator:

def hierarchy(d, prefix=None):
    if isinstance(d, dict):
        for k, v in d.items():
            prefix2 = f'{prefix}.{k}' if prefix else k
            yield prefix2
            if isinstance(v, list):
                seen = set()
                for x in v:
                    if isinstance(x, dict):
                        yield from hierarchy({k: v for k, v in x.items()
                                              if k not in seen},
                                             prefix=prefix2)
                        seen.update(x.keys())
                    else:
                        yield from hierarchy(x, prefix=prefix2)
            elif isinstance(v, dict):
                yield from hierarchy(v, prefix=prefix2)
                
out = list(hierarchy(inpt))

# validation
assert out == expected_output

Output:

['boo', 'soo', 'roo', 'qoo', 'fizz', 'buzz', 'yoyo', 'wazzw', 'bnn',
 'cc', 'cc.boo', 'cc.soo', 'cc.roo', 'cc.qoo', 'cc.fizz', 'cc.buzz',
 'cc.yoyo', 'cc.wazzw', 'cc.nummm', 'cc.bsdfff', 'cc.hgdjgkk', 'cc.opu', 'cc.mnb',
 'soos', 'tyu', 'dddd3']

Different example:

list(hierarchy({'l1': {'l2': {'l3': 'test', 'l4': [['abc'], {'l5': 'def'}]}}}))
# ['l1', 'l1.l2', 'l1.l2.l3', 'l1.l2.l4', 'l1.l2.l4.l5']

CodePudding user response:

Modification of mozway's answer; https://www.mycompiler.io/view/6LB7k4TVOuj

# Includes $ for root node, and [] where access is through an array

def hierarchy(struct, path=None):
    if isinstance(struct, dict):
        path = path if path else '$'
        return set(
            child_path
                for key, obj   in struct.items()
                for child_path in hierarchy(obj, f'{path}.{key}')
        ).union(
            [path]
        )
    elif isinstance(struct, list):
        path = f'{path}[]' if path else '$[]'
        return set(
            child_path
                for obj        in struct
                for child_path in hierarchy(obj, path)
        ).union(
            [path]
        )
    else:
        return [path]

Or...

from itertools import chain

# Excludes those $ and [] markers

def hierarchy2(d):
    if isinstance(d, dict):
        return set(
            f'{k}.{x}' if x else k
                for k,v in d.items()
                for x in chain([''], hierarchy2(v))
        )
    elif isinstance(d, list):
        return set(
            v
                for l in d
                for v in hierarchy2(l)
                    if v
        )
    else:
        return set()
  • Related