import pandas as pd
data=pd.DataFrame({'vehicle':['car','car','car','car','car','car','bus','bus','bus','bus','bus','bus','car','car','car','car','car','car','bus','bus','bus','bus','bus','bus'],
'expecteddate':['2/24/2022','2/24/2022','3/15/2022','3/15/2022','4/20/2022','4/20/2022','2/24/2022','2/24/2022','3/15/2022','3/15/2022','4/20/2022','4/20/2022','2/24/2022','2/24/2022','3/15/2022','3/15/2022','4/20/2022','4/20/2022','2/24/2022','2/24/2022','3/15/2022','3/15/2022','4/20/2022','4/20/2022'],'range':[240,240,240,240,240,240,300,300,300,300,300,300,240,240,240,240,240,240,300,300,300,300,300,300],'color':['blue','red','blue','red','blue','red','blue','red','blue','red','blue','red','blue','red','blue','red','blue','red','blue','red','blue','red','blue','red'],'discount':[70,80,90,60,40,50,120,110,130,140,80,90,60,40,50,30,70,45,130,100,140,120,80,90],'price1':[60,40,90,50,40,50,100,100,130,140,80,90,50,40,90,60,40,50,30,40,130,70,70,40],'date':['2/18/2022','2/18/2022','2/18/2022','2/18/2022','2/18/2022','2/18/2022','2/18/2022','2/18/2022','2/18/2022','2/18/2022','2/18/2022','2/18/2022','2/17/2022','2/17/2022','2/17/2022','2/17/2022','2/17/2022','2/17/2022','2/17/2022','2/17/2022','2/17/2022','2/17/2022','2/17/2022','2/17/2022']})
print(data)
My dataframe looks like
vehicle expecteddate range color discount price1 date
0 car 2/24/2022 240 blue 70 60 2/18/2022
1 car 2/24/2022 240 red 80 40 2/18/2022
2 car 3/15/2022 240 blue 90 90 2/18/2022
3 car 3/15/2022 240 red 60 50 2/18/2022
4 car 4/20/2022 240 blue 40 40 2/18/2022
5 car 4/20/2022 240 red 50 50 2/18/2022
6 bus 2/24/2022 300 blue 120 100 2/18/2022
7 bus 2/24/2022 300 red 110 100 2/18/2022
8 bus 3/15/2022 300 blue 130 130 2/18/2022
9 bus 3/15/2022 300 red 140 140 2/18/2022
10 bus 4/20/2022 300 blue 80 80 2/18/2022
11 bus 4/20/2022 300 red 90 90 2/18/2022
12 car 2/24/2022 240 blue 60 50 2/17/2022
13 car 2/24/2022 240 red 40 40 2/17/2022
14 car 3/15/2022 240 blue 50 90 2/17/2022
15 car 3/15/2022 240 red 30 60 2/17/2022
16 car 4/20/2022 240 blue 70 40 2/17/2022
17 car 4/20/2022 240 red 45 50 2/17/2022
18 bus 2/24/2022 300 blue 130 30 2/17/2022
19 bus 2/24/2022 300 red 100 40 2/17/2022
20 bus 3/15/2022 300 blue 140 130 2/17/2022
21 bus 3/15/2022 300 red 120 70 2/17/2022
22 bus 4/20/2022 300 blue 80 70 2/17/2022
23 bus 4/20/2022 300 red 90 40 2/17/2022
from this dataframe we have two vehicles ,three expecteddates ,range,two colors ,discount and date. We have filter rows based on condition like, if latest date discount=latest date price1 and latest date price1=previous date price1, if condition matches print such rows regarding latest date
this should filtered based on vehicle,expecteddate,range,color and date
Output looks like
Vehicle expecteddate range color discount price1 date
Car 3/15/2022 240 blue 90 90 2/18/2022
Car 3/15/2022 240 blue 40 40 2/18/2022
Car 4/20/2022 240 red 50 50 2/18/2022
Bus 3/15/2022 300 blue 130 130 2/18/2022
vehicles are not limited to two like car and bus ,it has many vehicles and data is not always have equal rows in vehicle and range columns and date is not limited to two dates
CodePudding user response:
First of all, we threat our date columns as such:
df["date"] = pd.to_datetime(df["date"])
Then, we can iterate each group (notice we do not group by date, because we need them for our conditions):
output_rows = list() # We'll store here the relevant rows
for _, group_df in df.groupby(["vehicle", "expecteddate", "range", "color"]):
# For each group, we find the row of the latest dates using `.nlargest(2)`
group_df = group_df.nlargest(2, "date")[["discount", "price1"]]
# Now we can check both conditions and retrieve a boolean (True or False)
condition_1 = (group_df.iloc[0, 0] == group_df.iloc[0, 1])
condition_2 = (group_df.iloc[0, 1] == group_df.iloc[-1, 1])
# If both conditions are True, we store the date in our output list
if condition_1 and condition_2:
output_rows.append(latest_date_row)
# Finally, we create a dataframe with the relevant rows
output_df = pd.DataFrame(output_rows)
This is the output I got for your sample date:
vehicle expecteddate range color discount price1 date
8 bus 2022-03-15 300 blue 130 130 2022-02-18
2 car 2022-03-15 240 blue 90 90 2022-02-18
4 car 2022-04-20 240 blue 40 40 2022-02-18
5 car 2022-04-20 240 red 50 50 2022-02-18
CodePudding user response:
You can filter like this:
data.loc[data['discount']==data['price1']]