Python group by multiple keys in a dict-CodePudding

I have a list of dict I want to group by multiple keys.

I have used sort by default in python dict

data = [
[],
[{'value': 8, 'bot': 'DB', 'month': 9, 'year': 2020}, {'value': 79, 'bot': 'DB', 'month': 10, 'year': 2020}, {'value': 126, 'bot': 'DB', 'month':8, 'year': 2021}],
[],
[{'value': 222, 'bot': 'GEMBOT', 'month': 11, 'year': 2020}, {'value': 623, 'bot': 'GEMBOT', 'month': 4, 'year': 2021}, {'value': 628, 'bot': 'GEMBOT', 'month': 9, 'year': 2021}],
[{'value': 0, 'bot': 'GEMBOT', 'month': 4, 'year': 2021}],
[{'value': 703, 'bot': 'DB', 'month': 11, 'year': 2020}, {'value': 1081, 'bot': 'DB', 'month': 3, 'year': 2021}, {'value': 1335, 'bot': 'DB', 'month': 10, 'year': 2020}, {'value': 1920, 'bot': 'DB', 'month': 4, 'year': 2021}, {'value': 2132, 'bot': 'DB', 'month': 1, 'year': 2021}, {'value': 2383, 'bot': 'DB', 'month': 2, 'year': 2021}]
]

output_dict = {}

for i in data:
    if not i:
        pass
    for j in i:
        for key,val in sorted(j.items()):
            output_dict.setdefault(val, []).append(key)

            
print(output_dict)
    
{'DB': ['bot', 'bot', 'bot', 'bot', 'bot', 'bot', 'bot', 'bot', 'bot'], 9: ['month', 'month', 'month'], 8: ['value'], 2020: ['year', 'year', 'year', 'year', 'year'], 10: ['month', 'month'], 79: ['value'], 126: ['value'], 2021: ['year', 'year', 'year', 'year', 'year', 'year', 'year', 'year'], 'GEMBOT': ['bot', 'bot', 'bot', 'bot'], 11: ['month', 'month'], 222: ['value'], 4: ['month', 'month', 'month'], 623: ['value'], 628: ['value'], 0: ['value'], 703: ['value'], 3: ['month'], 1081: ['value'], 1335: ['value'], 1920: ['value'], 1: ['month'], 2132: ['value'], 2: ['month'], 2383: ['value']}

But I want the output like this.

[{ "bot": "DB",
   "date": "Sept 20",
   "value": 134
},{"bot": "DB",
   "date": "Oct 20",
   "value": 79
}.. So on ]

Is there an efficient way to flatten this list ?

Thanks in advance

CodePudding user response：

Maybe try:

from pprint import pprint
import datetime

output_dict = []

for i in data:
  if i:
    for j in i:
      for key, val in sorted(j.items()):
        if key == "bot":
          temp["bot"] = val
        elif key == "value":
          temp["value"] = val
        elif key == "month":
          month = datetime.datetime.strptime(str(val), "%m")
          temp["date"] = month.strftime("%b")
        elif key == "year":
          temp["date"] = str(temp["date"])   " "   str(val)
      output_dict.append(temp)
      temp = {}

pprint(output_dict)

The final results are shown as follows:

[{'bot': 'DB', 'date': 'Sep 2020', 'value': 8},
 {'bot': 'DB', 'date': 'Oct 2020', 'value': 79},
 {'bot': 'DB', 'date': 'Aug 2021', 'value': 126},
 {'bot': 'GEMBOT', 'date': 'Nov 2020', 'value': 222},
 {'bot': 'GEMBOT', 'date': 'Apr 2021', 'value': 623},
 {'bot': 'GEMBOT', 'date': 'Sep 2021', 'value': 628},
 {'bot': 'GEMBOT', 'date': 'Apr 2021', 'value': 0},
 {'bot': 'DB', 'date': 'Nov 2020', 'value': 703},
 {'bot': 'DB', 'date': 'Mar 2021', 'value': 1081},
 {'bot': 'DB', 'date': 'Oct 2020', 'value': 1335},
 {'bot': 'DB', 'date': 'Apr 2021', 'value': 1920},
 {'bot': 'DB', 'date': 'Jan 2021', 'value': 2132},
 {'bot': 'DB', 'date': 'Feb 2021', 'value': 2383}]

CodePudding user response：

Two things will make this easier to answer. The first is a list comprehension that will promote sub-items:

data_reshaped = [cell for row in data for cell in row]

this will take your original data and flatten it a bit to:

[
    {'value': 8, 'bot': 'DB', 'month': 9, 'year': 2020},
    {'value': 79, 'bot': 'DB', 'month': 10, 'year': 2020},
    {'value': 126, 'bot': 'DB', 'month': 8, 'year': 2021},
    {'value': 222, 'bot': 'GEMBOT', 'month': 11, 'year': 2020},
    {'value': 623, 'bot': 'GEMBOT', 'month': 4, 'year': 2021},
    {'value': 628, 'bot': 'GEMBOT', 'month': 9, 'year': 2021},
    {'value': 0, 'bot': 'GEMBOT', 'month': 4, 'year': 2021},
    {'value': 703, 'bot': 'DB', 'month': 11, 'year': 2020},
    {'value': 1081, 'bot': 'DB', 'month': 3, 'year': 2021},
    {'value': 1335, 'bot': 'DB', 'month': 10, 'year': 2020},
    {'value': 1920, 'bot': 'DB', 'month': 4, 'year': 2021},
    {'value': 2132, 'bot': 'DB', 'month': 1, 'year': 2021},
    {'value': 2383, 'bot': 'DB', 'month': 2, 'year': 2021}
]

Now we can iterate over that using an compound key and setdefault() to aggregate the results. Note if you rather use collections.defaultdict() as I do then swap that out for setdefault().

results = {}
for cell in data_reshaped:
    key = f"{cell['bot']}_{cell['year']}_{cell['month']}"
    value = cell["value"] # save the value so we can reset cell next
    cell["value"] = 0 # setting this to 0 cleans up the next line.
    results.setdefault(key, cell)["value"]  = value

This should allow you to:

for result in results.values():
    print(result)

Giving:

{'value': 8, 'bot': 'DB', 'month': 9, 'year': 2020}
{'value': 1414, 'bot': 'DB', 'month': 10, 'year': 2020}
{'value': 126, 'bot': 'DB', 'month': 8, 'year': 2021}
{'value': 222, 'bot': 'GEMBOT', 'month': 11, 'year': 2020}
{'value': 623, 'bot': 'GEMBOT', 'month': 4, 'year': 2021}
{'value': 628, 'bot': 'GEMBOT', 'month': 9, 'year': 2021}
{'value': 703, 'bot': 'DB', 'month': 11, 'year': 2020}
{'value': 1081, 'bot': 'DB', 'month': 3, 'year': 2021}
{'value': 1920, 'bot': 'DB', 'month': 4, 'year': 2021}
{'value': 2132, 'bot': 'DB', 'month': 1, 'year': 2021}
{'value': 2383, 'bot': 'DB', 'month': 2, 'year': 2021}

Full solution:

data = [
    [],
    [
        {'value': 8, 'bot': 'DB', 'month': 9, 'year': 2020},
        {'value': 79, 'bot': 'DB', 'month': 10, 'year': 2020},
        {'value': 126, 'bot': 'DB', 'month':8, 'year': 2021}
    ],
    [],
    [
        {'value': 222, 'bot': 'GEMBOT', 'month': 11, 'year': 2020},
        {'value': 623, 'bot': 'GEMBOT', 'month': 4, 'year': 2021},
        {'value': 628, 'bot': 'GEMBOT', 'month': 9, 'year': 2021}
    ],
    [
        {'value': 0, 'bot': 'GEMBOT', 'month': 4, 'year': 2021}
    ],
    [
        {'value': 703, 'bot': 'DB', 'month': 11, 'year': 2020},
        {'value': 1081, 'bot': 'DB', 'month': 3, 'year': 2021},
        {'value': 1335, 'bot': 'DB', 'month': 10, 'year': 2020},
        {'value': 1920, 'bot': 'DB', 'month': 4, 'year': 2021},
        {'value': 2132, 'bot': 'DB', 'month': 1, 'year': 2021},
        {'value': 2383, 'bot': 'DB', 'month': 2, 'year': 2021}
    ]
]

data_reshaped = [cell for row in data for cell in row]

results = {}
for cell in data_reshaped:
    key = f"{cell['bot']}_{cell['year']}_{cell['month']}"
    value = cell["value"]
    cell["value"] = 0
    results.setdefault(key, cell)["value"]  = value

for result in results.values():
    print(result)

Again Giving:

{'value': 8, 'bot': 'DB', 'month': 9, 'year': 2020}
{'value': 1414, 'bot': 'DB', 'month': 10, 'year': 2020}
{'value': 126, 'bot': 'DB', 'month': 8, 'year': 2021}
{'value': 222, 'bot': 'GEMBOT', 'month': 11, 'year': 2020}
{'value': 623, 'bot': 'GEMBOT', 'month': 4, 'year': 2021}
{'value': 628, 'bot': 'GEMBOT', 'month': 9, 'year': 2021}
{'value': 703, 'bot': 'DB', 'month': 11, 'year': 2020}
{'value': 1081, 'bot': 'DB', 'month': 3, 'year': 2021}
{'value': 1920, 'bot': 'DB', 'month': 4, 'year': 2021}
{'value': 2132, 'bot': 'DB', 'month': 1, 'year': 2021}
{'value': 2383, 'bot': 'DB', 'month': 2, 'year': 2021}

I will leave it to you to figure out casting the two date fields to some other presentation as that seems out of context with the question at hand.

CodePudding user response：

Maybe try:

output = []
for i in data:
    if not i:
        pass
    for j in i:
        output.append(j)

And then if you want to sort it, then you can use sorted_output = sorted(ouput, key=lambda k: k['bot']) to sort it by bot for example. If you want to sort it by date, maybe create a value that calculates the date in months and then sorts it from there.

CodePudding user response：

I can't comment because I don't have enough points.

You have some empty lists in your data set, why? Remove those.

You can do an itertools group by. Your lambda function could be lambda d: (d.get('bot'), d.get('date'))

https://www.google.com/amp/s/www.geeksforgeeks.org/itertools-groupby-in-python/amp/

I would highly encourage using the pandas library here, if possible.