With a Python list of dictionaries which always contains start
and end
dates, how would you sort the list based on the "combined" start
and end
dates?
What would be the simplest (most Pythonic) way to obtain the end result with the following criteria - from top to bottom:
- Sort by
end_date
first and then bystart_date
. end_date
(most recent first).- If there are two objects with the same
end_date
, then the lateststart_date
comes first. - If
start_date
andend_date
are the same then keep as is in order.
import datetime
blah = [
{"id": 1, "start_date": datetime.date(2021, 5, 1), "end_date": None},
{"id": 2, "start_date": datetime.date(2013, 2, 1), "end_date": None},
{"id": 3, "start_date": datetime.date(2017, 1, 1), "end_date": datetime.date(2018, 1, 1)},
{"id": 4, "start_date": datetime.date(2016, 5, 1), "end_date": datetime.date(2019, 6, 1)},
{"id": 5, "start_date": datetime.date(2012, 1, 1), "end_date": datetime.date(2015, 1, 1)},
{"id": 6, "start_date": datetime.date(2008, 1, 1), "end_date": datetime.date(2011, 1, 1)},
{"id": 7, "start_date": datetime.date(2006, 1, 1), "end_date": datetime.date(2008, 1, 1)},
{"id": 8, "start_date": datetime.date(2005, 1, 15), "end_date": datetime.date(2010, 1, 15)},
{"id": 9, "start_date": datetime.date(2002, 1, 15), "end_date": datetime.date(2002, 1, 15)},
{"id": 10, "start_date": datetime.date(2002, 1, 1), "end_date": datetime.date(2006, 1, 1)},
{"id": 11, "start_date": datetime.date(2002, 1, 1), "end_date": datetime.date(2006, 1, 1)},
{"id": 12, "start_date": datetime.date(2001, 2, 1), "end_date": datetime.date(2003, 1, 1)},
{"id": 13, "start_date": datetime.date(2001, 1, 15), "end_date": datetime.date(2003, 1, 15)},
{"id": 14, "start_date": datetime.date(1998, 1, 1), "end_date": datetime.date(2001, 1, 1)},
{"id": 15, "start_date": datetime.date(1997, 1, 15), "end_date": datetime.date(1997, 1, 15)}
]
# Do something here...and return `result`.
result = [
{"id": 1, "start_date": datetime.date(2021, 5, 1), "end_date": None},
{"id": 2, "start_date": datetime.date(2013, 2, 1), "end_date": None},
{"id": 4, "start_date": datetime.date(2016, 5, 1), "end_date": datetime.date(2019, 6, 1)},
{"id": 3, "start_date": datetime.date(2017, 1, 1), "end_date": datetime.date(2018, 1, 1)},
{"id": 5, "start_date": datetime.date(2012, 1, 1), "end_date": datetime.date(2015, 1, 1)},
{"id": 6, "start_date": datetime.date(2008, 1, 1), "end_date": datetime.date(2011, 1, 1)},
{"id": 8, "start_date": datetime.date(2005, 1, 15), "end_date": datetime.date(2010, 1, 15)},
{"id": 7, "start_date": datetime.date(2006, 1, 1), "end_date": datetime.date(2008, 1, 1)},
{"id": 11, "start_date": datetime.date(2002, 1, 1), "end_date": datetime.date(2006, 1, 1)},
{"id": 10, "start_date": datetime.date(2002, 1, 1), "end_date": datetime.date(2006, 1, 1)},
{"id": 9, "start_date": datetime.date(2002, 1, 15), "end_date": datetime.date(2002, 1, 15)},
{"id": 12, "start_date": datetime.date(2001, 2, 1), "end_date": datetime.date(2003, 1, 1)},
{"id": 13, "start_date": datetime.date(2001, 1, 15), "end_date": datetime.date(2003, 1, 15)},
{"id": 14, "start_date": datetime.date(1998, 1, 1), "end_date": datetime.date(2001, 1, 1)},
{"id": 15, "start_date": datetime.date(1997, 1, 15), "end_date": datetime.date(1997, 1, 15)}
]
CodePudding user response:
What would be the simplest (most Pythonic) way to obtain the end result ...
The simplest (most Pythonic) way I can think of would be using pandas.
Demo:
import datetime
import pandas as pd
blah = [
{"id": 1, "start_date": datetime.date(2021, 5, 1), "end_date": None},
{"id": 2, "start_date": datetime.date(2013, 2, 1), "end_date": None},
{"id": 3, "start_date": datetime.date(2017, 1, 1), "end_date": datetime.date(2018, 1, 1)},
{"id": 4, "start_date": datetime.date(2016, 5, 1), "end_date": datetime.date(2019, 6, 1)},
{"id": 5, "start_date": datetime.date(2012, 1, 1), "end_date": datetime.date(2015, 1, 1)},
{"id": 6, "start_date": datetime.date(2008, 1, 1), "end_date": datetime.date(2011, 1, 1)},
{"id": 7, "start_date": datetime.date(2006, 1, 1), "end_date": datetime.date(2008, 1, 1)},
{"id": 8, "start_date": datetime.date(2005, 1, 15), "end_date": datetime.date(2010, 1, 15)},
{"id": 9, "start_date": datetime.date(2002, 1, 15), "end_date": datetime.date(2002, 1, 15)},
{"id": 10, "start_date": datetime.date(2002, 1, 1), "end_date": datetime.date(2006, 1, 1)},
{"id": 11, "start_date": datetime.date(2002, 1, 1), "end_date": datetime.date(2006, 1, 1)},
{"id": 12, "start_date": datetime.date(2001, 2, 1), "end_date": datetime.date(2003, 1, 1)},
{"id": 13, "start_date": datetime.date(2001, 1, 15), "end_date": datetime.date(2003, 1, 15)},
{"id": 14, "start_date": datetime.date(1998, 1, 1), "end_date": datetime.date(2001, 1, 1)},
{"id": 15, "start_date": datetime.date(1997, 1, 15), "end_date": datetime.date(1997, 1, 15)}
]
df = pd.DataFrame(blah)
result = df.sort_values(['end_date', 'start_date'], ascending=(False, False), na_position='first').to_dict('records')
for e in result:
print(e)
Output:
{'id': 1, 'start_date': datetime.date(2021, 5, 1), 'end_date': None}
{'id': 2, 'start_date': datetime.date(2013, 2, 1), 'end_date': None}
{'id': 4, 'start_date': datetime.date(2016, 5, 1), 'end_date': datetime.date(2019, 6, 1)}
{'id': 3, 'start_date': datetime.date(2017, 1, 1), 'end_date': datetime.date(2018, 1, 1)}
{'id': 5, 'start_date': datetime.date(2012, 1, 1), 'end_date': datetime.date(2015, 1, 1)}
{'id': 6, 'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2011, 1, 1)}
{'id': 8, 'start_date': datetime.date(2005, 1, 15), 'end_date': datetime.date(2010, 1, 15)}
{'id': 7, 'start_date': datetime.date(2006, 1, 1), 'end_date': datetime.date(2008, 1, 1)}
{'id': 10, 'start_date': datetime.date(2002, 1, 1), 'end_date': datetime.date(2006, 1, 1)}
{'id': 11, 'start_date': datetime.date(2002, 1, 1), 'end_date': datetime.date(2006, 1, 1)}
{'id': 13, 'start_date': datetime.date(2001, 1, 15), 'end_date': datetime.date(2003, 1, 15)}
{'id': 12, 'start_date': datetime.date(2001, 2, 1), 'end_date': datetime.date(2003, 1, 1)}
{'id': 9, 'start_date': datetime.date(2002, 1, 15), 'end_date': datetime.date(2002, 1, 15)}
{'id': 14, 'start_date': datetime.date(1998, 1, 1), 'end_date': datetime.date(2001, 1, 1)}
{'id': 15, 'start_date': datetime.date(1997, 1, 15), 'end_date': datetime.date(1997, 1, 15)}
CodePudding user response:
To order wrt end_date
I introduced a "fake date" to make the data consistent. This choice is arbitrary but should avoid conflicts with other values.
In order to make sense the question 3. I modify the start date for 2006, see comment.
blah = [
{"id": 1, "start_date": datetime.date(2021, 5, 1), "end_date": None},
{"id": 2, "start_date": datetime.date(2013, 2, 1), "end_date": None},
{"id": 3, "start_date": datetime.date(2017, 1, 1), "end_date": datetime.date(2018, 1, 1)},
{"id": 4, "start_date": datetime.date(2016, 5, 1), "end_date": datetime.date(2019, 6, 1)},
{"id": 5, "start_date": datetime.date(2012, 1, 1), "end_date": datetime.date(2015, 1, 1)},
{"id": 6, "start_date": datetime.date(2008, 1, 1), "end_date": datetime.date(2011, 1, 1)},
{"id": 7, "start_date": datetime.date(2006, 1, 1), "end_date": datetime.date(2008, 1, 1)},
{"id": 8, "start_date": datetime.date(2005, 1, 15), "end_date": datetime.date(2010, 1, 15)},
{"id": 9, "start_date": datetime.date(2002, 1, 15), "end_date": datetime.date(2002, 1, 15)},
{"id": 10, "start_date": datetime.date(2002, 1, 2), "end_date": datetime.date(2006, 1, 1)}, # <---- modified start_date!
{"id": 11, "start_date": datetime.date(2002, 1, 1), "end_date": datetime.date(2006, 1, 1)},
{"id": 12, "start_date": datetime.date(2001, 2, 1), "end_date": datetime.date(2003, 1, 1)},
{"id": 13, "start_date": datetime.date(2001, 1, 15), "end_date": datetime.date(2003, 1, 15)},
{"id": 14, "start_date": datetime.date(1998, 1, 1), "end_date": datetime.date(2001, 1, 1)},
{"id": 15, "start_date": datetime.date(1997, 1, 15), "end_date": datetime.date(1997, 1, 15)}
]
Here the code.
# 1
print(list(sorted(blah, key=lambda p: p['start_date'])))
print(list(sorted(blah, reverse=True, key=lambda p: p['start_date']))) # reverse, A
print(list(reversed(sorted(blah, key=lambda p: p['start_date'])))) # reverse, B
# 2
fake_date = datetime.date(2999, 9, 9)
print(list(reversed(sorted(blah, key=lambda p: p['end_date'] if p['end_date'] is not None else fake_date))))
# 3
import itertools as it
grp_by_end_dates = it.groupby(sorted(blah, key=lambda p: p['end_date'] if p['end_date'] is not None else fake_date), key=lambda p: p['end_date'])
print(list((sorted(list(i), key=lambda p: p['start_date']) for _, i in grp_by_end_dates)))