I'm having difficulty transforming weekly data that rolls over every 52 weeks for annual regressions.
Here is what I'm trying to do but I get the error "Error interpreting categorical data: all items must be hashable FDX ~ SP50". I believe this is because I'm trying to use the pandas rolling function.
df = pd.read_excel("dataset\Special_Proj.xlsx")
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%y')
def rollingwindow():
tickers = ['FDX', 'BRK.A', 'MSFT', 'NVDA', 'INTC', 'AMD', 'JPM', 'T', 'AAPL', 'AMZN', 'GS']
year = ['Year 2', 'Year 3', 'Year 4', 'Year 5', 'Year 6', 'Year 7', 'Year 8', 'Year 9', 'Year 10',
'Year 11', 'Year 12', 'Year 13','Year 14','Year 15','Year 16','Year 17','Year 18','Year 19','Year 20' ]
moving_window = df[(df['Date'] >= '2000-12-29')].rolling(52)
for t in tickers:
for y in year:
model = smf.ols(f'{t} ~ SP50', data=moving_window).fit()
print(model.summary(yname="Status", xname=['Intercept', f'{t} Beta'],
title=f'Regression {y}'))
print()
rollingwindow()
Here is the df head output to show what the data frame looks like. It is weekly stock prices from 1999-2021.
Date SP50 FDX BRK MSFT NVDA INTC AMD JPM T AAPL AMZN GS
0 1999-12-31 100.00 100.00 100.00 100.00 100.00 100.00 100.00 100.00 100.00 100.00 100.00 100.00 NaN
1 2000-01-07 98.11 116.03 100.71 95.45 89.21 99.62 112.31 93.64 90.51 96.78 91.38 87.66 NaN
2 2000-01-14 99.72 113.74 93.05 96.15 93.61 125.21 139.52 95.09 86.03 97.69 84.40 90.91 NaN
3 2000-01-21 98.10 101.98 94.30 88.87 95.34 118.98 131.32 93.89 88.21 108.27 81.53 90.98 NaN
4 2000-01-28 92.58 94.20 93.23 84.15 79.63 114.20 121.81 98.71 80.51 98.84 81.03 92.37 NaN
CodePudding user response:
Here is your function with a revised nested loop over years and tickers:
df = pd.read_excel("dataset\Special_Proj.xlsx")
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%y')
def rolling_reg(df):
tickers = ['FDX', 'BRK.A', 'MSFT', 'NVDA', 'INTC', 'AMD', 'JPM', 'T', 'AAPL', 'AMZN', 'GS']
for y in range(2001, 2022): # 2001 through 2021
df_for_one_year = df[df.Date.dt.year == y]
for t in tickers:
model = smf.ols(f'{t} ~ SP50', data=df_for_one_year).fit()
print(model.summary(yname="Status", xname=['Intercept', f'{t} Beta'],
title=f'Regression {y}'))
print()
rolling_reg(df)