Fastest way to convert two-dimensional list to a nested dictionary-CodePudding

I have gone through a number of answers on StackOverflow, but none of them quite match my use-case. I have converted the closest answer that used tuples to code that uses dicts and got it to work:

def get_matching_parent_entry_recursive(entries_to_traverse, parent_id):
    for entry in entries_to_traverse:
        if entry["id"] == parent_id:
            return entry
        if len(entry["items"]) > 0:
            matching_entry = get_matching_parent_entry_recursive(entry["items"], parent_id)
            if matching_entry:
                return matching_entry
    return None


def get_content_tree(categories):
    simplified_entries = []
    unmatched_entries = categories

    while len(unmatched_entries) > 0:

        i = 0
        while i < len(unmatched_entries):
            if unmatched_entries[i]["parent"] == 0:  # we are dealing with a top-level entry
                simplified_entries.append(
                    {
                        "id": str(unmatched_entries[i]["id"]),
                        "label": unmatched_entries[i]["label"],
                        "items": []
                    }
                )
                del unmatched_entries[i]
            else:
                # try to find parent entry in the final simplified tree
                parent_entry = get_matching_parent_entry_recursive(
                    simplified_entries,
                    str(unmatched_entries[i]["parent"])
                )
                if parent_entry:
                    parent_entry["items"].append({
                        "id": str(unmatched_entries[i]["id"]),
                        "label": unmatched_entries[i]["label"],
                        "items": []
                    })
                    del unmatched_entries[i]
                else:
                    i  = 1
    return simplified_entries


categories = [
    {"id": 1, "label": 'Lamps', "parent": 0},
    {"id": 2, "label": 'Lamp 1', "parent": 1},
    {"id": 3, "label": 'Lamp 2', "parent": 1},
    {"id": 4, "label": 'Shirts', "parent": 0},
    {"id": 5, "label": 'Formal shirts', "parent": 4},
    {"id": 7, "label": 'T-Shirts', "parent": 9},
    {"id": 9, "label": 'Informal shirts', "parent": 4},
]

print(get_content_tree(categories))

However, I can see that this algo has to go through every single node to find a matching parent and it is bad for performace. Is there a faster way to get the same result?

EDIT: Replit link

EDIT 2: Ids are unique and hashable, but not presented in order that ensures that the parent is created before the child references it. I.e. all shirts can be moved to a newly created category, which would have ID 9, yet IDs of the T-Shirt will remain 7.

CodePudding user response：

Assuming ids are unique (and hashable), you could store reference to every dictionary under separate keys:

categories = [
    {"id": 1, "label": "Lamps", "parent": 0},
    {"id": 2, "label": "Lamp 1", "parent": 1},
    {"id": 3, "label": "Lamp 2", "parent": 1},
    {"id": 4, "label": "Shirts", "parent": 0},
    {"id": 5, "label": "Formal shirts", "parent": 4},
    {"id": 6, "label": "Informal shirts", "parent": 4},
    {"id": 7, "label": "T-Shirts", "parent": 6},
]

root = {0: {"items": []}}
for c in categories:
    raw = {"id": c["id"], "items": [], "label": c["label"]}
    root[c["id"]] = raw
    root[c["parent"]]["items"].append(raw)

print(root[0]["items"])

Edit:

It won't work if categories aren't topological sorted. In such case we could perform that sort (the best solution... in theory) or do small improvement, which in real case should be faster and much more elegant:

from collections import deque

categories = deque(
    [
        {"id": 1, "label": "Lamps", "parent": 0},
        {"id": 2, "label": "Lamp 1", "parent": 1},
        {"id": 3, "label": "Lamp 2", "parent": 1},
        {"id": 4, "label": "Shirts", "parent": 0},
        {"id": 5, "label": "Formal shirts", "parent": 4},
        {"id": 7, "label": "T-Shirts", "parent": 9},
        {"id": 9, "label": "Informal shirts", "parent": 4},
    ]
)

root = {0: {"items": []}}
while categories:
    c = categories.popleft()
    raw = {"id": c["id"], "items": [], "label": c["label"]}
    try:
        # order is important here, as we don't want to create reference in root dictionary
        root[c["parent"]]["items"].append(raw)         
        root[c["id"]] = raw
    except KeyError:
        # We didn't process parent category yet, lets back to that later
        categories.append(c)

print(root[0]["items"])