import pandas as pd
import numpy as np
df = pd.DataFrame.from_dict({'date':[1,2,3,4,5,6,7,8,9,10] ,'open':[4,5,3,4,5,6,7,8,9,10],'close':[4,5,6,7,8,1,2,9,10,11],'stock':['A']*5 ['B']*5})
df['flag'] = np.select([df['close']>df['open'],df['close']<df['open']],['up','down'],default='flat')
df
date | open | close | stock | flag | |
---|---|---|---|---|---|
0 | 1 | 4 | 4 | A | flat |
1 | 2 | 5 | 5 | A | flat |
2 | 3 | 3 | 6 | A | up |
3 | 4 | 4 | 7 | A | up |
4 | 5 | 5 | 8 | A | up |
5 | 6 | 6 | 1 | B | down |
6 | 7 | 7 | 2 | B | down |
7 | 8 | 8 | 9 | B | up |
8 | 9 | 9 | 10 | B | up |
9 | 10 | 10 | 11 | B | up |
I tried the following. None of them works. They all give me "No numeric types to aggregate" error
# flag if previous 3 days (t-2,t-1, and t) are all increase for each stock
df['3days_up'] = df.groupby('stock')['flag'].rolling(3).apply(lambda x: 'Yes' if all(x['flag']=='up') else 'No')
df['3days_up'] = df.groupby('stock')[['flag']].rolling(3).apply(lambda x: 'Yes' if all(x['flag']=='up') else 'No')
df['3days_up'] = df.groupby('stock').rolling(3).apply(lambda x: 'Yes' if all(x['flag']=='up') else 'No')
Expected output:
date | open | close | stock | flag | 3days_up | |
---|---|---|---|---|---|---|
0 | 1 | 4 | 4 | A | flat | No |
1 | 2 | 5 | 5 | A | flat | No |
2 | 3 | 3 | 6 | A | up | No |
3 | 4 | 4 | 7 | A | up | No |
4 | 5 | 5 | 8 | A | up | Yes |
5 | 6 | 6 | 1 | B | down | No |
6 | 7 | 7 | 2 | B | down | No |
7 | 8 | 8 | 9 | B | up | No |
8 | 9 | 9 | 10 | B | up | No |
9 | 10 | 10 | 11 | B | up | Yes |
CodePudding user response:
Convert up
value to True
and others to False
as starting point:
df['3days_up'] = np.where(df.assign(is_up=df['flag'] == 'up')
.groupby('stock').rolling(3)['is_up']
.sum() >= 3, 'Yes', 'No')
print(df)
# Output
date open close stock flag 3days_up
0 1 4 4 A flat No
1 2 5 5 A flat No
2 3 3 6 A up No
3 4 4 7 A up No
4 5 5 8 A up Yes
5 6 6 1 B down No
6 7 7 2 B down No
7 8 8 9 B up No
8 9 9 10 B up No
9 10 10 11 B up Yes