I have gone through a number of answers on StackOverflow, but none of them quite match my use-case. I have converted the closest answer that used tuples to code that uses dicts and got it to work:
def get_matching_parent_entry_recursive(entries_to_traverse, parent_id):
for entry in entries_to_traverse:
if entry["id"] == parent_id:
return entry
if len(entry["items"]) > 0:
matching_entry = get_matching_parent_entry_recursive(entry["items"], parent_id)
if matching_entry:
return matching_entry
return None
def get_content_tree(categories):
simplified_entries = []
unmatched_entries = categories
while len(unmatched_entries) > 0:
i = 0
while i < len(unmatched_entries):
if unmatched_entries[i]["parent"] == 0: # we are dealing with a top-level entry
simplified_entries.append(
{
"id": str(unmatched_entries[i]["id"]),
"label": unmatched_entries[i]["label"],
"items": []
}
)
del unmatched_entries[i]
else:
# try to find parent entry in the final simplified tree
parent_entry = get_matching_parent_entry_recursive(
simplified_entries,
str(unmatched_entries[i]["parent"])
)
if parent_entry:
parent_entry["items"].append({
"id": str(unmatched_entries[i]["id"]),
"label": unmatched_entries[i]["label"],
"items": []
})
del unmatched_entries[i]
else:
i = 1
return simplified_entries
categories = [
{"id": 1, "label": 'Lamps', "parent": 0},
{"id": 2, "label": 'Lamp 1', "parent": 1},
{"id": 3, "label": 'Lamp 2', "parent": 1},
{"id": 4, "label": 'Shirts', "parent": 0},
{"id": 5, "label": 'Formal shirts', "parent": 4},
{"id": 7, "label": 'T-Shirts', "parent": 9},
{"id": 9, "label": 'Informal shirts', "parent": 4},
]
print(get_content_tree(categories))
However, I can see that this algo has to go through every single node to find a matching parent and it is bad for performace. Is there a faster way to get the same result?
EDIT: Replit link
EDIT 2: Ids are unique and hashable, but not presented in order that ensures that the parent is created before the child references it. I.e. all shirts can be moved to a newly created category, which would have ID 9, yet IDs of the T-Shirt will remain 7.
CodePudding user response:
Assuming ids are unique (and hashable), you could store reference to every dictionary under separate keys:
categories = [
{"id": 1, "label": "Lamps", "parent": 0},
{"id": 2, "label": "Lamp 1", "parent": 1},
{"id": 3, "label": "Lamp 2", "parent": 1},
{"id": 4, "label": "Shirts", "parent": 0},
{"id": 5, "label": "Formal shirts", "parent": 4},
{"id": 6, "label": "Informal shirts", "parent": 4},
{"id": 7, "label": "T-Shirts", "parent": 6},
]
root = {0: {"items": []}}
for c in categories:
raw = {"id": c["id"], "items": [], "label": c["label"]}
root[c["id"]] = raw
root[c["parent"]]["items"].append(raw)
print(root[0]["items"])
Edit:
It won't work if categories aren't topological sorted. In such case we could perform that sort (the best solution... in theory) or do small improvement, which in real case should be faster and much more elegant:
from collections import deque
categories = deque(
[
{"id": 1, "label": "Lamps", "parent": 0},
{"id": 2, "label": "Lamp 1", "parent": 1},
{"id": 3, "label": "Lamp 2", "parent": 1},
{"id": 4, "label": "Shirts", "parent": 0},
{"id": 5, "label": "Formal shirts", "parent": 4},
{"id": 7, "label": "T-Shirts", "parent": 9},
{"id": 9, "label": "Informal shirts", "parent": 4},
]
)
root = {0: {"items": []}}
while categories:
c = categories.popleft()
raw = {"id": c["id"], "items": [], "label": c["label"]}
try:
# order is important here, as we don't want to create reference in root dictionary
root[c["parent"]]["items"].append(raw)
root[c["id"]] = raw
except KeyError:
# We didn't process parent category yet, lets back to that later
categories.append(c)
print(root[0]["items"])