this is my sample data of US inflation rates
With the code below I am trying to transpose the data
new_inflation_df = pd.DataFrame(columns=['year','quarter','inflation_rate'])
for rows in inflation_df.iterrows():
for q in range(1,5):
new_inflation_df.at[rows q-1, 'year'] = inflation_df.at[rows, 'YEAR']
if q == 1:
new_inflation_df.at[rows q-1, 'quarter'] = 1
new_inflation_df.at[rows q-1, 'inflation_rate'] = inflation_df.at[rows, "MAR"]
elif q == 2:
new_inflation_df.at[rows q-1, 'quarter'] = 2
new_inflation_df.at[rows q-1, 'inflation_rate'] = inflation_df.at[rows, "JUN"]
elif q == 3:
new_inflation_df.at[rows q-1, 'quarter'] = 3
new_inflation_df.at[rows q-1, 'inflation_rate'] = inflation_df.at[rows, "SEP"]
elif q == 4:
new_inflation_df.at[rows q-1, 'quarter'] = 4
new_inflation_df.at[rows q-1, 'inflation_rate'] = inflation_df.at[rows, "DEC"]
This is my expected output of the new_inflation_df:
but I am getting this typeerror: can anyone assist?
TypeError: '(0, YEAR 1914.0 JAN 2.0 FEB 1.0 MAR 1 APR 0.0 MAY 2.1 JUN
1.0 JUL 1.0 AUG 3.0 SEP 2.0 OCT 1.0 NOV 1.0 DEC 1.0 AVE 1.0 Quarter 4 Name: 0, dtype: object)' is an invalid key
CodePudding user response:
I created my own data. Someone with more experience with Pandas then I do will write the code below with more finesse.
import pandas as pd
data=[
[1914, 1, 1, 1, 10, 12, 11, 1, 1, 1, 10, 12, 11],
[1916, 5, 5, 5.1, 15, 15, 20, 5, 5, 5.1, 15, 15, 20],
[1918, 19.7, 17.5, 16.7, 1, 2, 3, 19.7, 17.5, 16.7, 1, 2, 3]
]
cols = ["YEAR", "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]
inflation_df = pd.DataFrame(data, columns = cols )
# create new df
new_inflation_df = pd.DataFrame()
counter = 0
for index, rows in inflation_df.iterrows():
for q in range(1,5):
counter = 1
new_inflation_df.loc[counter,'year'] = inflation_df.at[index, 'YEAR']
if q == 1:
new_inflation_df.at[counter,'quarter'] = q
new_inflation_df.at[counter,'inflation_rate'] = inflation_df.loc[ index, ['JAN', 'FEB', 'MAR']].mean()
elif q == 2:
new_inflation_df.at[counter,'quarter'] = q
new_inflation_df.at[counter,'inflation_rate'] = inflation_df.loc[ index, ['APR', 'MAY', 'JUN']].mean()
elif q == 3:
new_inflation_df.at[counter,'quarter'] = q
new_inflation_df.at[counter,'inflation_rate'] = inflation_df.loc[ index, ['JUL', 'AUG', 'SEP']].mean()
elif q == 4:
new_inflation_df.at[counter,'quarter'] = q
new_inflation_df.at[counter,'inflation_rate'] = inflation_df.loc[ index, ['OCT', 'NOV', 'DEC']].mean()
#convert year and quarter to integers
new_inflation_df['year'] = new_inflation_df['year'].astype('int')
new_inflation_df['quarter'] = new_inflation_df['quarter'].astype('int')
print(new_inflation_df)
Output:
year quarter inflation_rate
1 1914 1 1.000000
2 1914 2 11.000000
3 1914 3 1.000000
4 1914 4 11.000000
5 1916 1 5.033333
6 1916 2 16.666667
7 1916 3 5.033333
8 1916 4 16.666667
9 1918 1 17.966667
10 1918 2 2.000000
11 1918 3 17.966667
12 1918 4 2.000000