I'm working creating a data time column from a data that has "anual_quarters" as the column names and values for each data point of each quarter.
I was thinking of creating a custom function and using .apply to return a data I could use. But I can't seem to write a function the iterates over each row/col of the data using iterrows() and iteritems()
Heres the last thing I tried.
from datetime import datetime
def get_recent_orders(merged_data):
for index, row in merged_data.iteritems():
if row['q4_sales_2021'] > 0:
return datetime(2021, 10, 1)
elif row['q3_sales_2021'] > 0:
return datetime(2021, 7, 1)
elif row['q2_sales_2021'] > 0:
return datetime(2021, 4, 1)
elif row['q1_sales_2021'] > 0:
return datetime(2021, 1, 1)
elif row['q4_sales_2020'] > 0:
return datetime(2020, 10, 1)
elif row['q3_sales_2020'] > 0:
return datetime(2020, 7, 1)
elif row['q2_sales_2020'] > 0:
return datetime(2020, 4, 1)
elif row['q1_sales_2020'] > 0:
return datetime(2020, 1, 1)
else:
return None
merged_data['last_order'] = merged_data.apply(get_recent_orders, axis=0)
CodePudding user response:
Use numpy.select
:
import numpy as np
conditions = [merged_data['q4_sales_2021'].gt(0),
merged_data['q3_sales_2021'].gt(0),
merged_data['q2_sales_2021'].gt(0),
merged_data['q1_sales_2021'].gt(0),
merged_data['q4_sales_2020'].gt(0),
merged_data['q3_sales_2020'].gt(0),
merged_data['q2_sales_2020'].gt(0),
merged_data['q1_sales_2020'].gt(0)]
choices = [datetime(2021, 10, 1),
datetime(2021, 7, 1),
datetime(2021, 4, 1),
datetime(2021, 1, 1),
datetime(2020, 10, 1),
datetime(2020, 7, 1),
datetime(2020, 4, 1),
datetime(2020, 1, 1)]
merged_data["last_order"] = np.select(conditions, choices, None)
CodePudding user response:
With np.select
:
condlist = [df['q4_sales_2021'] > 0,
df['q3_sales_2021'] > 0,
df['q2_sales_2021'] > 0,
df['q1_sales_2021'] > 0,
df['q4_sales_2020'] > 0,
df['q3_sales_2020'] > 0,
df['q2_sales_2020'] > 0,
df['q1_sales_2020'] > 0]
choicelist = [datetime(2021, 10, 1),
datetime(2021, 7, 1),
datetime(2021, 4, 1),
datetime(2021, 1, 1),
datetime(2020, 10, 1),
datetime(2020, 7, 1),
datetime(2020, 4, 1),
datetime(2020, 1, 1)]
default = pd.NAT
merged_data['last_order'] = np.select(condlist, choicelist, default)
CodePudding user response:
You could do this instead:
df = pd.DataFrame(data={"annual_quarters": ["q4_sales_2021", "q3_sales_2021", "q2_sales_2021", "q1_sales_2021",
"q4_sales_2020", "q3_sales_2020", "q2_sales_2020", "q1_sales_2020"]})
import datetime as dt
def get_recent_orders(quarter):
month_list = [1, 4, 7, 10]
month = month_list[int(str(quarter)[1])-1]
year = int(str(quarter)[-4:])
return dt.date(year, month, 1)
df["last_order"] = df["annual_quarters"].apply(get_recent_orders)
As the month will be Jan, Apr, Jul or Oct depending on Q1, 2, 3 or 4, you can return a datetime.date
by finding the month value from the second string character, the year from the last 4 characters, and day = 1. This way, no need to iterate over rows at all.