data = {'machine_id': [1000,1000,3000,2000,3000,1000,1000,3000,2000,3000,1000,1000,30000,2000,3000],
'item_id': [100,100,100,200,300,100,100,100,200,300,100,100,100,200,300],
'Date': ['2022-03-01','2022-03-01','2022-03-01','2022-03-01','2022-03-01',
'2022-04-01','2022-04-01','2022-04-01','2022-04-01','2022-04-01',
'2022-05-01','2022-05-01','2022-05-01','2022-05-01','2022-05-01'],
'Sales_qty':[5,6,5,7,8,1,3,7,9,6,4,7,8,9,4]}
df = pd.DataFrame(data)
df['Date']=pd.to_datetime(df['Date'])
df.set_index('Date',inplace=True)
df['total']=df.groupby(['Date'])['Sales_qty'].transform('sum')
df['sales_avg']=df['Sales_qty']/df['total']
in output column i want last month sales avg.
|Date| Last_month_sale|
0 2022-03-01 NaN
1 2022-03-01 NaN
2 2022-03-01 NaN
3 2022-03-01 NaN
4 2022-03-01 NaN
5 2022-04-01 0.038462
6 2022-04-01 0.115385
7 2022-04-01 0.269231
8 2022-04-01 0.346154
9 2022-04-01 0.230769
10 2022-05-01 0.125000
11 2022-05-01 0.218750
12 2022-05-01 0.250000
13 2022-05-01 0.281250
14 2022-05-01 0.125000
Need code get the output. For March month the sales avg will be nan because we dont have feb month data. For april month we have last month data that is of march and for may month we have april month data.
CodePudding user response:
I am using my approach from this question: Can .apply use information from other groups?.
grouped = df.groupby(['Date'])['sales_avg']
dates = list(grouped.groups.keys())
iterator_dates = iter(dates)
def f(ser):
if ser.name == dates[0]:
return np.nan
prev_date = next(iterator_dates)
prev_group = grouped.get_group(prev_date)
prev_group.index = ser.index
return prev_group
df['prev_avg'] = grouped.transform(f)
print(df)
machine_id item_id Sales_qty total sales_avg prev_avg
Date
2022-03-01 1000 100 5 31 0.161290 NaN
2022-03-01 1000 100 6 31 0.193548 NaN
2022-03-01 3000 100 5 31 0.161290 NaN
2022-03-01 2000 200 7 31 0.225806 NaN
2022-03-01 3000 300 8 31 0.258065 NaN
2022-04-01 1000 100 1 26 0.038462 0.161290
2022-04-01 1000 100 3 26 0.115385 0.193548
2022-04-01 3000 100 7 26 0.269231 0.161290
2022-04-01 2000 200 9 26 0.346154 0.225806
2022-04-01 3000 300 6 26 0.230769 0.258065
2022-05-01 1000 100 4 32 0.125000 0.038462
2022-05-01 1000 100 7 32 0.218750 0.115385
2022-05-01 30000 100 8 32 0.250000 0.269231
2022-05-01 2000 200 9 32 0.281250 0.346154
2022-05-01 3000 300 4 32 0.125000 0.230769
CodePudding user response:
You can first agg the groupby Date as list and then explode. But before that capture the index which is the first group's index so that later you can fill that with np.nan
df2 = df.groupby('Date')['sales_avg'].agg(list).shift().to_frame()
na_index = df2['sales_avg'].isna().index[0]
df2 = df2.bfill().explode('sales_avg')
df2.loc[na_index] = np.nan
df['sales_avg'] = df2['sales_avg']
print(df):
machine_id item_id Sales_qty total sales_avg
Date
2022-03-01 1000 100 5 31 NaN
2022-03-01 1000 100 6 31 NaN
2022-03-01 3000 100 5 31 NaN
2022-03-01 2000 200 7 31 NaN
2022-03-01 3000 300 8 31 NaN
2022-04-01 1000 100 1 26 0.16129
2022-04-01 1000 100 3 26 0.193548
2022-04-01 3000 100 7 26 0.16129
2022-04-01 2000 200 9 26 0.225806
2022-04-01 3000 300 6 26 0.258065
2022-05-01 1000 100 4 32 0.038462
2022-05-01 1000 100 7 32 0.115385
2022-05-01 30000 100 8 32 0.269231
2022-05-01 2000 200 9 32 0.346154
2022-05-01 3000 300 4 32 0.230769