Home > front end >  Comparing a combined start and end date to determine ordering
Comparing a combined start and end date to determine ordering

Time:10-20

With a Python list of dictionaries which always contains start and end dates, how would you sort the list based on the "combined" start and end dates?

What would be the simplest (most Pythonic) way to obtain the end result with the following criteria - from top to bottom:

  1. Sort by end_date first and then by start_date.
  2. end_date (most recent first).
  3. If there are two objects with the same end_date, then the latest start_date comes first.
  4. If start_date and end_date are the same then keep as is in order.
import datetime


blah = [
    {"id": 1, "start_date": datetime.date(2021, 5, 1), "end_date": None},
    {"id": 2, "start_date": datetime.date(2013, 2, 1), "end_date": None},
    {"id": 3, "start_date": datetime.date(2017, 1, 1), "end_date": datetime.date(2018, 1, 1)},
    {"id": 4, "start_date": datetime.date(2016, 5, 1), "end_date": datetime.date(2019, 6, 1)},
    {"id": 5, "start_date": datetime.date(2012, 1, 1), "end_date": datetime.date(2015, 1, 1)},
    {"id": 6, "start_date": datetime.date(2008, 1, 1), "end_date": datetime.date(2011, 1, 1)},
    {"id": 7, "start_date": datetime.date(2006, 1, 1), "end_date": datetime.date(2008, 1, 1)},
    {"id": 8, "start_date": datetime.date(2005, 1, 15), "end_date": datetime.date(2010, 1, 15)},
    {"id": 9, "start_date": datetime.date(2002, 1, 15), "end_date": datetime.date(2002, 1, 15)},
    {"id": 10, "start_date": datetime.date(2002, 1, 1), "end_date": datetime.date(2006, 1, 1)},
    {"id": 11, "start_date": datetime.date(2002, 1, 1), "end_date": datetime.date(2006, 1, 1)},
    {"id": 12, "start_date": datetime.date(2001, 2, 1), "end_date": datetime.date(2003, 1, 1)},
    {"id": 13, "start_date": datetime.date(2001, 1, 15), "end_date": datetime.date(2003, 1, 15)},
    {"id": 14, "start_date": datetime.date(1998, 1, 1), "end_date": datetime.date(2001, 1, 1)},
    {"id": 15, "start_date": datetime.date(1997, 1, 15), "end_date": datetime.date(1997, 1, 15)}
]

# Do something here...and return `result`.

result = [
    {"id": 1, "start_date": datetime.date(2021, 5, 1), "end_date": None},
    {"id": 2, "start_date": datetime.date(2013, 2, 1), "end_date": None},
    {"id": 4, "start_date": datetime.date(2016, 5, 1), "end_date": datetime.date(2019, 6, 1)},
    {"id": 3, "start_date": datetime.date(2017, 1, 1), "end_date": datetime.date(2018, 1, 1)},
    {"id": 5, "start_date": datetime.date(2012, 1, 1), "end_date": datetime.date(2015, 1, 1)},
    {"id": 6, "start_date": datetime.date(2008, 1, 1), "end_date": datetime.date(2011, 1, 1)},
    {"id": 8, "start_date": datetime.date(2005, 1, 15), "end_date": datetime.date(2010, 1, 15)},
    {"id": 7, "start_date": datetime.date(2006, 1, 1), "end_date": datetime.date(2008, 1, 1)},
    {"id": 11, "start_date": datetime.date(2002, 1, 1), "end_date": datetime.date(2006, 1, 1)},
    {"id": 10, "start_date": datetime.date(2002, 1, 1), "end_date": datetime.date(2006, 1, 1)},
    {"id": 9, "start_date": datetime.date(2002, 1, 15), "end_date": datetime.date(2002, 1, 15)},
    {"id": 12, "start_date": datetime.date(2001, 2, 1), "end_date": datetime.date(2003, 1, 1)},
    {"id": 13, "start_date": datetime.date(2001, 1, 15), "end_date": datetime.date(2003, 1, 15)},
    {"id": 14, "start_date": datetime.date(1998, 1, 1), "end_date": datetime.date(2001, 1, 1)},
    {"id": 15, "start_date": datetime.date(1997, 1, 15), "end_date": datetime.date(1997, 1, 15)}
]

CodePudding user response:

What would be the simplest (most Pythonic) way to obtain the end result ...

The simplest (most Pythonic) way I can think of would be using pandas.

Demo:

import datetime
import pandas as pd

blah = [
    {"id": 1, "start_date": datetime.date(2021, 5, 1), "end_date": None},
    {"id": 2, "start_date": datetime.date(2013, 2, 1), "end_date": None},
    {"id": 3, "start_date": datetime.date(2017, 1, 1), "end_date": datetime.date(2018, 1, 1)},
    {"id": 4, "start_date": datetime.date(2016, 5, 1), "end_date": datetime.date(2019, 6, 1)},
    {"id": 5, "start_date": datetime.date(2012, 1, 1), "end_date": datetime.date(2015, 1, 1)},
    {"id": 6, "start_date": datetime.date(2008, 1, 1), "end_date": datetime.date(2011, 1, 1)},
    {"id": 7, "start_date": datetime.date(2006, 1, 1), "end_date": datetime.date(2008, 1, 1)},
    {"id": 8, "start_date": datetime.date(2005, 1, 15), "end_date": datetime.date(2010, 1, 15)},
    {"id": 9, "start_date": datetime.date(2002, 1, 15), "end_date": datetime.date(2002, 1, 15)},
    {"id": 10, "start_date": datetime.date(2002, 1, 1), "end_date": datetime.date(2006, 1, 1)},
    {"id": 11, "start_date": datetime.date(2002, 1, 1), "end_date": datetime.date(2006, 1, 1)},
    {"id": 12, "start_date": datetime.date(2001, 2, 1), "end_date": datetime.date(2003, 1, 1)},
    {"id": 13, "start_date": datetime.date(2001, 1, 15), "end_date": datetime.date(2003, 1, 15)},
    {"id": 14, "start_date": datetime.date(1998, 1, 1), "end_date": datetime.date(2001, 1, 1)},
    {"id": 15, "start_date": datetime.date(1997, 1, 15), "end_date": datetime.date(1997, 1, 15)}
]

df = pd.DataFrame(blah)

result = df.sort_values(['end_date', 'start_date'], ascending=(False, False), na_position='first').to_dict('records')

for e in result:
    print(e)

Output:

{'id': 1, 'start_date': datetime.date(2021, 5, 1), 'end_date': None}
{'id': 2, 'start_date': datetime.date(2013, 2, 1), 'end_date': None}
{'id': 4, 'start_date': datetime.date(2016, 5, 1), 'end_date': datetime.date(2019, 6, 1)}
{'id': 3, 'start_date': datetime.date(2017, 1, 1), 'end_date': datetime.date(2018, 1, 1)}
{'id': 5, 'start_date': datetime.date(2012, 1, 1), 'end_date': datetime.date(2015, 1, 1)}
{'id': 6, 'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2011, 1, 1)}
{'id': 8, 'start_date': datetime.date(2005, 1, 15), 'end_date': datetime.date(2010, 1, 15)}
{'id': 7, 'start_date': datetime.date(2006, 1, 1), 'end_date': datetime.date(2008, 1, 1)}
{'id': 10, 'start_date': datetime.date(2002, 1, 1), 'end_date': datetime.date(2006, 1, 1)}
{'id': 11, 'start_date': datetime.date(2002, 1, 1), 'end_date': datetime.date(2006, 1, 1)}
{'id': 13, 'start_date': datetime.date(2001, 1, 15), 'end_date': datetime.date(2003, 1, 15)}
{'id': 12, 'start_date': datetime.date(2001, 2, 1), 'end_date': datetime.date(2003, 1, 1)}
{'id': 9, 'start_date': datetime.date(2002, 1, 15), 'end_date': datetime.date(2002, 1, 15)}
{'id': 14, 'start_date': datetime.date(1998, 1, 1), 'end_date': datetime.date(2001, 1, 1)}
{'id': 15, 'start_date': datetime.date(1997, 1, 15), 'end_date': datetime.date(1997, 1, 15)}

CodePudding user response:

To order wrt end_date I introduced a "fake date" to make the data consistent. This choice is arbitrary but should avoid conflicts with other values.

In order to make sense the question 3. I modify the start date for 2006, see comment.

blah = [
    {"id": 1, "start_date": datetime.date(2021, 5, 1), "end_date": None},
    {"id": 2, "start_date": datetime.date(2013, 2, 1), "end_date": None},
    {"id": 3, "start_date": datetime.date(2017, 1, 1), "end_date": datetime.date(2018, 1, 1)},
    {"id": 4, "start_date": datetime.date(2016, 5, 1), "end_date": datetime.date(2019, 6, 1)},
    {"id": 5, "start_date": datetime.date(2012, 1, 1), "end_date": datetime.date(2015, 1, 1)},
    {"id": 6, "start_date": datetime.date(2008, 1, 1), "end_date": datetime.date(2011, 1, 1)},
    {"id": 7, "start_date": datetime.date(2006, 1, 1), "end_date": datetime.date(2008, 1, 1)},
    {"id": 8, "start_date": datetime.date(2005, 1, 15), "end_date": datetime.date(2010, 1, 15)},
    {"id": 9, "start_date": datetime.date(2002, 1, 15), "end_date": datetime.date(2002, 1, 15)},
    {"id": 10, "start_date": datetime.date(2002, 1, 2), "end_date": datetime.date(2006, 1, 1)}, # <---- modified start_date!
    {"id": 11, "start_date": datetime.date(2002, 1, 1), "end_date": datetime.date(2006, 1, 1)}, 
    {"id": 12, "start_date": datetime.date(2001, 2, 1), "end_date": datetime.date(2003, 1, 1)},
    {"id": 13, "start_date": datetime.date(2001, 1, 15), "end_date": datetime.date(2003, 1, 15)},
    {"id": 14, "start_date": datetime.date(1998, 1, 1), "end_date": datetime.date(2001, 1, 1)},
    {"id": 15, "start_date": datetime.date(1997, 1, 15), "end_date": datetime.date(1997, 1, 15)}
]

Here the code.

# 1
print(list(sorted(blah, key=lambda p: p['start_date'])))

print(list(sorted(blah, reverse=True, key=lambda p: p['start_date']))) # reverse, A
print(list(reversed(sorted(blah, key=lambda p: p['start_date'])))) # reverse, B

# 2
fake_date = datetime.date(2999, 9, 9)

print(list(reversed(sorted(blah, key=lambda p: p['end_date'] if p['end_date'] is not None else fake_date))))


# 3
import itertools as it

grp_by_end_dates = it.groupby(sorted(blah, key=lambda p: p['end_date'] if p['end_date'] is not None else fake_date), key=lambda p: p['end_date'])
print(list((sorted(list(i), key=lambda p: p['start_date']) for _, i in grp_by_end_dates)))
  • Related