Home > Software engineering >  python merge list of dicts into 1 based on common key
python merge list of dicts into 1 based on common key

Time:01-27

I have this dict:

data= [{'org_id': 'AGO-cbgo', 'ws_name': 'finops_enricher-nonprod', 'ws_id': 'ws-CTvV7QysPeY4Gt1Q', 'current_run': None}
{'org_id': 'AGO-cbgo', 'ws_name': 'finops_enricher-prod', 'ws_id': 'ws-s4inidN9aDxELE4a', 'current_run': None}
{'org_id': 'AGO-cbgo', 'ws_name': 'finops_enricher-preprod', 'ws_id': 'ws-fvyKv7m4FRYf8v5o', 'current_run': None}
{'org_id': 'AGO-cbgo', 'ws_name': 's3_dlp-getd_sherlock-prod', 'ws_id': 'ws-XpzzptzGHL2YNjsL', 'current_run': None}
{'org_id': 'AGO-cbgo', 'ws_name': 's3_dlp-getd_sherlock-nonprod', 'ws_id': 'ws-dksk8nnXTjzLWmRn', 'current_run': 'run-osSNuCtt5ULHPBus'}
]

I need to have this result:

    result= {'AGO-cbgo', 'ws': [
            {'ws_name': 'finops_enricher-nonprod', 'ws_id': 'ws-CTvV7QysPeY4Gt1Q', 'current_run': None},
            {'ws_name': 'finops_enricher-preprod', 'ws_id': 'ws-fvyKv7m4FRYf8v5o', 'current_run': None},
            {'ws_name': 's3_dlp-getd_sherlock-prod', 'ws_id': 'ws-XpzzptzGHL2YNjsL', 'current_run': None},
            {'ws_name': 's3_dlp-getd_sherlock-nonprod', 'ws_id': 'ws-dksk8nnXTjzLWmRn', 'current_run': 'run-osSNuCtt5ULHPBus'}
            ]
        }

Any idea how to achieve this? I played around with collections and defaultdict, but without success.

CodePudding user response:

This should do the trick, without mutating the original data:

from pprint import pprint

data = [
    {'org_id': 'AGO-cbgo', 'ws_name': 'finops_enricher-nonprod', 'ws_id': 'ws-CTvV7QysPeY4Gt1Q', 'current_run': None},
    {'org_id': 'AGO-cbgo', 'ws_name': 'finops_enricher-prod', 'ws_id': 'ws-s4inidN9aDxELE4a', 'current_run': None},
    {'org_id': 'AGO-cbgo', 'ws_name': 'finops_enricher-preprod', 'ws_id': 'ws-fvyKv7m4FRYf8v5o', 'current_run': None},
    {'org_id': 'AGO-cbgo', 'ws_name': 's3_dlp-getd_sherlock-prod', 'ws_id': 'ws-XpzzptzGHL2YNjsL', 'current_run': None},
    {'org_id': 'AGO-cbgo', 'ws_name': 's3_dlp-getd_sherlock-nonprod', 'ws_id': 'ws-dksk8nnXTjzLWmRn', 'current_run': 'run-osSNuCtt5ULHPBus'},
]

new_data = {}

for d in data:
    # copy the dict to prevent mutating the original data
    d_copy = dict(**d)
    d_copy.pop('org_id')
    # add the copied dict to the output, creating keys as needed
    new_data.setdefault(d['org_id'], []).append(d_copy)

pprint(new_data)

Result:

{'AGO-cbgo': [{'current_run': None,
               'ws_id': 'ws-CTvV7QysPeY4Gt1Q',
               'ws_name': 'finops_enricher-nonprod'},
              {'current_run': None,
               'ws_id': 'ws-s4inidN9aDxELE4a',
               'ws_name': 'finops_enricher-prod'},
              {'current_run': None,
               'ws_id': 'ws-fvyKv7m4FRYf8v5o',
               'ws_name': 'finops_enricher-preprod'},
              {'current_run': None,
               'ws_id': 'ws-XpzzptzGHL2YNjsL',
               'ws_name': 's3_dlp-getd_sherlock-prod'},
              {'current_run': 'run-osSNuCtt5ULHPBus',
               'ws_id': 'ws-dksk8nnXTjzLWmRn',
               'ws_name': 's3_dlp-getd_sherlock-nonprod'}]}

CodePudding user response:

Using defaultdict from collections: Having 'ws' as a key and it's value as a list containing sum-dicts with different attributes

from collections import defaultdict

res = defaultdict(list)
res["org_id"] = set([item["org_id"] for item in data])
for item in data:
    res["ws"].append({key: value for key, value in item.items() if key != "org_id"})

new_dict = dict(res)
print(new_dict)

You could also arrive to same output without defaultdict with the following approaches:
Using zip()

org_id, ws = zip(*[(item.pop("org_id"), item) for item in data])
new_dict = {"org_id": set(org_id), "ws": list(ws)}
print(new_dict)

Or initializing dictionary:

new_dict = {"org_id": set(), "ws": []}
for item in data:
    new_dict["org_id"].add(item["org_id"])
    new_dict["ws"].append({key: value for key, value in item.items() if key !="org_id"})

print(new_dict)

Note: Your output is not a proper dictionary. Bellow is a proper output which I suspect its the output you might be asking for.


{'org_id': {'AGO-cbgo'}, 'ws': [
    {'ws_name': 'finops_enricher-nonprod', 'ws_id': 'ws-CTvV7QysPeY4Gt1Q', 'current_run': None}, 
    {'ws_name': 'finops_enricher-prod', 'ws_id': 'ws-s4inidN9aDxELE4a', 'current_run': None}, 
    {'ws_name': 'finops_enricher-preprod', 'ws_id': 'ws-fvyKv7m4FRYf8v5o', 'current_run': None}, 
    {'ws_name': 's3_dlp-getd_sherlock-prod', 'ws_id': 'ws-XpzzptzGHL2YNjsL', 'current_run': None}, 
    {'ws_name': 's3_dlp-getd_sherlock-nonprod', 'ws_id': 'ws-dksk8nnXTjzLWmRn', 'current_run': 'run-osSNuCtt5ULHPBus'}
    ]}

printing output with the pprint module will produce the bellow format:

from pprint import pprint 

print(new_dict)

{'org_id': {'AGO-cbgo'},
 'ws': [{'current_run': None,
         'ws_id': 'ws-CTvV7QysPeY4Gt1Q',
         'ws_name': 'finops_enricher-nonprod'},
        {'current_run': None,
         'ws_id': 'ws-s4inidN9aDxELE4a',
         'ws_name': 'finops_enricher-prod'},
        {'current_run': None,
         'ws_id': 'ws-fvyKv7m4FRYf8v5o',
         'ws_name': 'finops_enricher-preprod'},
        {'current_run': None,
         'ws_id': 'ws-XpzzptzGHL2YNjsL',
         'ws_name': 's3_dlp-getd_sherlock-prod'},
        {'current_run': 'run-osSNuCtt5ULHPBus',
         'ws_id': 'ws-dksk8nnXTjzLWmRn',
         'ws_name': 's3_dlp-getd_sherlock-nonprod'}]}
  • Related