I have a DataFrame as follows:
data = [[np.nan,12,122],[np.nan,1230,1287],[123,101,812739],[1143,12301230,252],[234,342,4546],[2445,3453,3457],[7897,8657,5675], [46,5675,453],[76,484,3735], [363,93,4568], [385,568,367], [458,846,4847], [574,45747,658468], [57457,46534,4675]]
df1 = pd.DataFrame(data, index=['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
'2022-01-05', '2022-01-06', '2022-01-07', '2022-01-08',
'2022-01-09', '2022-01-10', '2022-01-11', '2022-01-12',
'2022-01-13', '2022-01-14'],
columns=['col_A', 'col_B', 'col_C'])
df1.index = pd.to_datetime(df1.index)
periodicity_dict = {'daily':'1D', 'weekly':'1W'}
df_columns = df1.columns
for key, value in periodicity_dict.items():
for col in df_columns:
df1[col '_rolling_' key]= np.nan
for i in df1[col].index[df1[col].index >= df1[col].first_valid_index() pd.Timedelta(value)]:
df1[col '_rolling_' key].loc[i] = (df1[col].loc[i] - df1[col].loc[i-pd.Timedelta(value)])/df1[col].loc[i-pd.Timedelta(value)]
df1 = df1.filter(regex='rolling')
I'd like to get something of the following form:
col_A_rolling col_B_rolling col_C_rolling Type timeframe
quantile
0.01 -0.317000 -0.234 -0.0443 pct weekly
0.03 -0.314636 -0.022 ... pct weekly
0.05 ... ... ... ...
0.10 ... ...
0.01 ... ...
0.03 ... ...
0.05 ... ...
0.10 -0.306364 -.530023 pct daily
(NOTE: the numbers in this DataFrame are hypothetical)
How can this be done using .stack()
and .strp()
functions? If possible.
CodePudding user response:
Here's what I came up with. There might be a more clever way; I'm not sure.
Also, I don't know what the Type
column is for...
df2 = (
df1
.rename_axis('date')
.rename_axis('col', axis=1)
.stack()
.rename('value')
.reset_index()
)
df2['timeframe'] = df2['col'].map(lambda s: s.split('_')[-1])
df2['col'] = df2['col'].map(lambda s: '_'.join(s.split('_')[:-1]))
def get_quantiles(df, timeframe):
return (
df.query('timeframe == @timeframe')
.pivot(None, 'col', 'value')
.quantile([0.01, 0.03, 0.05, 0.1])
.rename_axis('quantile')
.rename_axis(None, axis=1)
.assign(Type='pct', timeframe=timeframe)
)
result = pd.concat((get_quantiles(df2, 'weekly'),
get_quantiles(df2, 'daily')))
Result:
col_A_rolling col_B_rolling col_C_rolling Type timeframe
quantile
0.01 -0.761152 -0.976347 -0.945289 pct weekly
0.03 -0.752987 -0.929133 -0.847109 pct weekly
0.05 -0.744822 -0.881919 -0.748929 pct weekly
0.10 -0.724408 -0.763884 -0.503479 pct weekly
0.01 -0.974285 -0.990122 -0.998875 pct daily
0.03 -0.934505 -0.970421 -0.997246 pct daily
0.05 -0.894725 -0.950721 -0.995616 pct daily
0.10 -0.795276 -0.917252 -0.978355 pct daily