Let's start with a simple DataFrame:
df = pd.DataFrame({"a":[100,100,105,110,100,106,120,110,105,70,90, 100]})
df:
a
0 100
1 100
2 105
3 110
4 100
5 106
6 120
7 110
8 105
9 70
10 90
11 100
Now, I want to calculate the returns on a 7-day rolling basis. So I apply the following:
df['delta_rol_a_last_first'] = np.nan
for i in range(7,len(df)):
df['delta_rol_a_last_first'].iloc[i] = (df['a'].iloc[i] - df['a'].iloc[i-7])/df['a'].iloc[i-6]
df.dropna(inplace=True)
df:
a delta_rol_a_last_first
7 110 0.100000
8 105 0.047619
9 70 -0.318182
10 90 -0.200000
11 100 0.000000
Now I just want the negative returns, apply quantiles to them and I want to add identities to the rows as follows:
df_quant = df['delta_rol_a_last_first'][df['delta_rol_a_last_first'] <0].quantile([0.01,0.03,0.05,0.1])
df_quant.index.names = ['quantile']
df_quant=df_quant.to_frame()
df_quant['Type'] = 'pct'
df_quant['timeframe'] = 'weekly'
df_quant:
delta_rol_a_last_first Type timeframe
quantile
0.01 -0.317000 pct weekly
0.03 -0.314636 pct weekly
0.05 -0.312273 pct weekly
0.10 -0.306364 pct weekly
So that works perfectly.
Now imagine I want to do the same but more dynamically. So consider a DataFrame with multiple columns as follows:
data = [[99330,12,122],[1123,1230,1287],[123,101,812739],[1143,1230123,252],[234,342,4546],[2445,3453,3457],[7897,8657,5675],[46,5675,453],[76,484,3735],[363,93,4568],[385,568,367],[458,846,4847],[574,45747,658468],[57457,46534,4675]]
df1 = pd.DataFrame(data, index=['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
'2022-01-05', '2022-01-06', '2022-01-07', '2022-01-08',
'2022-01-09', '2022-01-10', '2022-01-11', '2022-01-12',
'2022-01-13', '2022-01-14'],
columns=['col_A', 'col_B', 'col_C'])
df1.index = pd.to_datetime(df1.index)
df1:
col_A col_B col_C
2022-01-01 99330 12 122
2022-01-02 1123 1230 1287
2022-01-03 123 101 812739
2022-01-04 1143 1230123 252
2022-01-05 234 342 4546
2022-01-06 2445 3453 3457
2022-01-07 7897 8657 5675
2022-01-08 46 5675 453
2022-01-09 76 484 3735
2022-01-10 363 93 4568
2022-01-11 385 568 367
2022-01-12 458 846 4847
2022-01-13 574 45747 658468
2022-01-14 57457 46534 4675
I will create a dictionary for the periods over which I want to calculate my rolling returns:
periodicity_dict = {'1D':'daily', '1W':'weekly'}
Now I want to create the same DataFrame as df_quant
above. So my DataFrame should look something like this:
col_A_rolling col_B_rolling col_C_rolling Type timeframe
quantile
0.01 -0.317000 -0.234 -0.0443 pct weekly
0.03 -0.314636 -0.022 ... pct weekly
0.05 ... ... ... ...
0.10 ... ...
0.01 ... ...
0.03 ... ...
0.05 ... ...
0.10 -0.306364 -.530023 pct daily
(NOTE: the numbers in this DataFrame are hypothetical)
EDIT:
My attempt is this:
periodicity_dict = {'1D':'daily', '1W':'weekly'}
df_columns = df1.columns
for key in periodicity_dict:
for col in df_columns:
df1[col '_rolling']= np.nan
for i in pd.date_range(start=df1[col].first_valid_index(), end=df1[col].last_valid_index(), freq=key):
df1[col '_rolling'].iloc[i] = (df1[col].iloc[i] - df1[col].iloc[i-key])/df1[col].iloc[i-key]
What is the best way to do this? Any help would be appreciated.
CodePudding user response:
I didn't test all code but first part can be replaced by DataFrame.roling
df = pd.DataFrame({"a":[100,100,105,110,100,106,120,110,105,70,90, 100]})
# ---
def convert(data):
return (data.iloc[-1] - data.iloc[0])/data.iloc[1]
df[['delta_rol_a_last_first']] = df.rolling(8).apply(convert)
# ---
print(df)
or using lambda
df[['delta_rol_a_last_first']] = df.rolling(8).apply(lambda data: ((data.iloc[-1] - data.iloc[0])/data.iloc[1]))
The same for many columns:
import pandas as pd
data = [
[99330,12,122], [1123,1230,1287], [123,101,812739], [1143,1230123,252],
[234,342,4546], [2445,3453,3457], [7897,8657,5675], [46,5675,453],
[76,484,3735], [363,93,4568], [385,568,367], [458,846,4847],
[574,45747,658468], [57457,46534,4675]
]
df = pd.DataFrame(
data,
index=['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
'2022-01-05', '2022-01-06', '2022-01-07', '2022-01-08',
'2022-01-09', '2022-01-10', '2022-01-11', '2022-01-12',
'2022-01-13', '2022-01-14'],
columns=['col_A', 'col_B', 'col_C']
)
df.index = pd.to_datetime(df.index)
# ---
def convert(data):
return (data.iloc[-1] - data.iloc[0])/data.iloc[1]
#df[['col_A_weekly', 'col_B_weekly', 'col_C_weekly']] = df.rolling(8).apply(convert)
new_columns = [name '_weekly' for name in df.columns]
df[new_columns] = df.rolling(8).apply(convert)
# ---
print(df)
Result:
col_A col_B col_C col_A_weekly col_B_weekly col_C_weekly
2022-01-01 99330 12 122 NaN NaN NaN
2022-01-02 1123 1230 1287 NaN NaN NaN
2022-01-03 123 101 812739 NaN NaN NaN
2022-01-04 1143 1230123 252 NaN NaN NaN
2022-01-05 234 342 4546 NaN NaN NaN
2022-01-06 2445 3453 3457 NaN NaN NaN
2022-01-07 7897 8657 5675 NaN NaN NaN
2022-01-08 46 5675 453 -88.409617 4.604065 0.257187
2022-01-09 76 484 3735 -8.512195 -7.386139 0.003012
2022-01-10 363 93 4568 0.209974 -0.000007 -3207.027778
2022-01-11 385 568 367 -3.239316 -3595.190058 0.025297
2022-01-12 458 846 4847 0.091616 0.145960 0.087070
2022-01-13 574 45747 658468 -0.236925 4.885526 115.420441
2022-01-14 57457 46534 4675 1077.391304 6.674361 -2.207506
EDIT:
Using two ranges daily
and weekly
old_columns = df.columns
new_columns = [name '_weekly' for name in old_columns]
df[new_columns] = df[old_columns].rolling(8).apply(convert)
new_columns = [name '_daily' for name in old_columns]
df[new_columns] = df[old_columns].rolling(2).apply(convert)
or using loop:
old_columns = df.columns
for days, suffix in ((1, 'daily'), (7, 'weekly')):
new_columns = [name '_' suffix for name in old_columns]
df[new_columns] = df[old_columns].rolling(days 1).apply(convert)
or
for days, suffix in ((1, 'daily'), (7, 'weekly')):
for name in old_columns:
new_name = name '_' suffix
df[new_name] = df[name].rolling(days 1).apply(convert)
Result:
col_A col_B col_C col_A_weekly col_B_weekly col_C_weekly col_A_daily col_B_daily col_C_daily
2022-01-01 99330 12 122 NaN NaN NaN NaN NaN NaN
2022-01-02 1123 1230 1287 NaN NaN NaN -87.450579 0.990244 0.905206
2022-01-03 123 101 812739 NaN NaN NaN -8.130081 -11.178218 0.998416
2022-01-04 1143 1230123 252 NaN NaN NaN 0.892388 0.999918 -3224.154762
2022-01-05 234 342 4546 NaN NaN NaN -3.884615 -3595.850877 0.944567
2022-01-06 2445 3453 3457 NaN NaN NaN 0.904294 0.900956 -0.315013
2022-01-07 7897 8657 5675 NaN NaN NaN 0.690389 0.601132 0.390837
2022-01-08 46 5675 453 -88.409617 4.604065 0.257187 -170.673913 -0.525463 -11.527594
2022-01-09 76 484 3735 -8.512195 -7.386139 0.003012 0.394737 -10.725207 0.878715
2022-01-10 363 93 4568 0.209974 -0.000007 -3207.027778 0.790634 -4.204301 0.182356
2022-01-11 385 568 367 -3.239316 -3595.190058 0.025297 0.057143 0.836268 -11.446866
2022-01-12 458 846 4847 0.091616 0.145960 0.087070 0.159389 0.328605 0.924283
2022-01-13 574 45747 658468 -0.236925 4.885526 115.420441 0.202091 0.981507 0.992639
2022-01-14 57457 46534 4675 1077.391304 6.674361 -2.207506 0.990010 0.016912 -139.848770
EDIT:
Quantile:
finall_df = pd.DataFrame()
for days, suffix in ((1, 'daily'), (7, 'weekly')):
df_quant = pd.DataFrame()
for name in old_columns:
new_name = name '_' suffix
df_quant[name] = df[new_name][df[new_name]<0].quantile([0.01,0.03,0.05,0.1])
df_quant.index.names = ['quantile']
df_quant['Type'] = 'pct'
df_quant['timeframe'] = suffix
print(df_quant.to_string())
#finall_df = finall_df.append(df_quant)
finall_df = pd.concat([finall_df,df_quant])
print(finall_df)
Result:
col_A col_B col_C Type timeframe
quantile
0.01 -168.177213 -3452.463971 -3100.782522 pct daily
0.03 -163.183813 -3165.690158 -2854.038043 pct daily
0.05 -158.190413 -2878.916345 -2607.293564 pct daily
0.10 -145.706913 -2161.981813 -1990.432365 pct daily
0.01 -86.012694 -3523.433980 -3174.979575 pct weekly
0.03 -81.218849 -3379.921823 -3110.883170 pct weekly
0.05 -76.425004 -3236.409666 -3046.786764 pct weekly
0.10 -64.440391 -2877.629275 -2886.545751 pct weekly