I have nested dictionaries, which are representations of XML that I have parsed using xmltodict.
Now I want to give the possibility to extract certain sub-structures and remove keys which contain '@'.
{'rpc': {'@xmlns': 'urn:1.0',
'@message-id': '4',
'edit-config': {'target': {'running': None},
'config': {'test': {'@xmlns': 'urn:2.0',
'common': {'abc': 'forward',
'bbc': {'geo-model': 'texas',
'remote': [{'id': '288',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'},
{'distributed-system-id': '318',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'},
{'distributed-system-id': '348',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'}]}}}}}}}
I have defined a function where the user should be able to provide an input to choose which tag they want to extract the substructure from, while also removing any key with '@' in it.
def process_xml_dict(d,clean_d,start_after_tag = None,reached_tag=False):
if start_after_tag == None:
for k, v in d.items():
if isinstance(v, dict):
process_xml_dict(v,clean_d)
else:
if '@' not in k:
clean_d[k] = v
else:
for k,v in d.items():
if isinstance(v, dict):
if k == start_after_tag:
reached_tag = True
process_xml_dict(v,clean_d,start_after_tag,reached_tag)
else:
if '@' not in k and reached_tag:
clean_d[k] = v
But it does not work
clean_d = dict()
process_xml_dict(d,clean_d)
print(clean_d)
Should output
{'rpc': {
'edit-config': {'target': {'running': None},
'config': {'test': {
'common': {'abc': 'forward',
'bbc': {'geo-model': 'texas',
'remote': [{'id': '288',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'},
{'distributed-system-id': '318',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'},
{'distributed-system-id': '348',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'}]}}}}}}}
But now it outputs
{
'running': None,
'abc': 'forward',
'geo-model': 'texas',
'remote': [{'id': '288',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'},
{'distributed-system-id': '318',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'},
{'distributed-system-id': '348',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'}]}
And if I input
clean_d = dict()
process_xml_dict(d,clean_d,start_after_tag='config')
print(clean_d)
It should output
{'test': {'common': {'abc': 'forward',
'bbc': {'geo-model': 'texas',
'remote': [{'id': '288',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'},
{'distributed-id': '318',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'},
{'distributed-id': '348',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'}]}}}}
but now it outputs
{'abc': 'forward',
'geo-model': 'texas',
'remote': [{'id': '288',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'},
{'distributed-id': '318',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'},
{'distributed-id': '348',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'}]}
What am I doing wrong? And how would I modify my function to output expected output?
Thankful for any input.
CodePudding user response:
def clean(dictionary):
return {key:value for key, value in dictionary.items() if '@' not in key}
def clean_recursively(dictionary):
dictionary = clean(dictionary)
return {key: (clean_recursively(value)
if isinstance(value, dict) else value)
for key, value in dictionary.items()}
Does this achieve the result? :) You can write it without dictionary comprehension, I just think dict-comp looks fine in this particular case because it avoids deep nesting and looks 'clearer' to me.
CodePudding user response:
This will work for you
orig_dict = {'rpc': {'@xmlns': 'urn:1.0',
'@message-id': '4',
'edit-config': {'target': {'running': None},
'config': {'test': {'@xmlns': 'urn:2.0',
'common': {'abc': 'forward',
'bbc': {'geo-model': 'texas',
'remote': [{'id': '288',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'},
{'distributed-system-id': '318',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'},
{'distributed-system-id': '348',
'transport': 'tcp',
'port': '8',
'ipv4-address': '0.0.0.0'}]}}}}}}}
def get_filter_dict(dict_):
res = {}
if isinstance(dict_, dict):
for k,v in dict_.items():
if isinstance(v, dict):
res[k] = get_filter_dict(v)
else:
if not k.startswith("@"):
res[k] = v
return res
def get_dict_start_after_tag(dict_, res=[None], start_after_tag=None):
if start_after_tag:
for k, v in dict_.items():
if k == start_after_tag:
res[0] = get_filter_dict(v)
return res[0]
if isinstance(v, dict):
_ = get_dict_start_after_tag(v, res, start_after_tag)
else:
res[0] = get_filter_dict(dict_)
return res
res = [None]
get_dict_start_after_tag(orig_dict, res)
# {'rpc': {'edit-config': {'target': {'running': None}, 'config': {'test': {'common': {'abc': 'forward', 'bbc': {'geo-model': 'texas', 'remote': [{'id': '288', 'transport': 'tcp', 'port': '8', 'ipv4-address': '0.0.0.0'}, {'distributed-system-id': '318', 'transport': 'tcp', 'port': '8', 'ipv4-address': '0.0.0.0'}, {'distributed-system-id': '348', 'transport': 'tcp', 'port': '8', 'ipv4-address': '0.0.0.0'}]}}}}}}}
get_dict_start_after_tag(orig_dict, res, "config")
# {'test': {'common': {'abc': 'forward', 'bbc': {'geo-model': 'texas', 'remote': [{'id': '288', 'transport': 'tcp', 'port': '8', 'ipv4-address': '0.0.0.0'}, {'distributed-system-id': '318', 'transport': 'tcp', 'port': '8', 'ipv4-address': '0.0.0.0'}, {'distributed-system-id': '348', 'transport': 'tcp', 'port': '8', 'ipv4-address': '0.0.0.0'}]}}}}
print(res[0])