I have list of dicts, key 'city' could be repetitive. I need to sort this list from most repetitive city to least.
My list of dicts:
data = [
{'city': 'Zp', 'p': 8},
{'city': 'Kyiv', 'p': 2},
{'city': 'Lviv', 'p': 7},
{'city': 'Kyiv', 'p': 3},
{'city': 'Kyiv', 'p': 4},
{'city': 'Brd', 'p': 1},
{'city': 'Kyiv', 'p': 5},
{'city': 'Zp', 'p': 9},
{'city': 'Lviv', 'p': 6},
]
I tried to sort by key value count, but got same result:
data = sorted(data, key=lambda x: data.count(x['city']))
# >> data
# [
# {'city': 'Zp', 'p': 8},
# {'city': 'Kyiv', 'p': 2},
# {'city': 'Lviv', 'p': 7},
# {'city': 'Kyiv', 'p': 3},
# {'city': 'Kyiv', 'p': 4},
# {'city': 'Brd', 'p': 1},
# {'city': 'Kyiv', 'p': 5},
# {'city': 'Zp', 'p': 9},
# {'city': 'Lviv', 'p': 6},
# ]
This would give me almost needed result, but there is 2 objects with 'Lviv' and 2 with 'Zp', and they are mixed in result (also I think this approach requires too much resources)
data = sorted(
data,
key=lambda x: len(
[i for i in data if i['city'] == x['city']]
),
reverse=True
)
# >> data
# [
# {'city': 'Kyiv', 'p': 2},
# {'city': 'Kyiv', 'p': 3},
# {'city': 'Kyiv', 'p': 4},
# {'city': 'Kyiv', 'p': 5},
# {'city': 'Zp', 'p': 8},
# {'city': 'Lviv', 'p': 7},
# {'city': 'Zp', 'p': 9},
# {'city': 'Lviv', 'p': 6},
# {'city': 'Brd', 'p': 1},
#]
What I want to get:
# >> data
# [
# {'city': 'Kyiv', 'p': 2},
# {'city': 'Kyiv', 'p': 3},
# {'city': 'Kyiv', 'p': 4},
# {'city': 'Kyiv', 'p': 5},
# {'city': 'Lviv', 'p': 6},
# {'city': 'Lviv', 'p': 7},
# {'city': 'Zp', 'p': 8},
# {'city': 'Zp', 'p': 9},
# {'city': 'Brd', 'p': 1},
# ]
CodePudding user response:
from collections import Counter
from pprint import pprint
data = [
{'city': 'Zp', 'p': 8},
{'city': 'Kyiv', 'p': 2},
{'city': 'Lviv', 'p': 7},
{'city': 'Kyiv', 'p': 3},
{'city': 'Kyiv', 'p': 4},
{'city': 'Brd', 'p': 1},
{'city': 'Kyiv', 'p': 5},
{'city': 'Zp', 'p': 9},
{'city': 'Lviv', 'p': 6},
]
# find the number of occurrences of each city
cities = map(lambda d: d['city'], data)
c = Counter(cities)
# sort data according to 1) frequency of the city, 2) name of the city
sorted_data = sorted(data, key=lambda d: (c[d['city']], d['city']), reverse=True)
pprint(sorted_data)
CodePudding user response:
You need to sort by two keys here:
- the number of a city record occurrences in
data
(you need to extract the list of city names only for this) - The city name for tie breaking
>>> sorted(
data,
key=lambda x: (list(y['city'] for y in data).count(x['city']), x['city']),
reverse=True
)
[
{'city': 'Kyiv', 'p': 2},
{'city': 'Kyiv', 'p': 3},
{'city': 'Kyiv', 'p': 4},
{'city': 'Kyiv', 'p': 5},
{'city': 'Zp', 'p': 8},
{'city': 'Zp', 'p': 9},
{'city': 'Lviv', 'p': 6},
{'city': 'Lviv', 'p': 7},
{'city': 'Brd', 'p': 1}
]