I have a excel in below format:
I want to convert the rows into columns as below:
How can I do this transformation using Python? Thank you!
CodePudding user response:
import pandas as pd
df = pd.read_excel('your_excel_file.xlsx')
df = df.pivot(index='Data', columns='Station', values='Average')
CodePudding user response:
from what i understand this would be the solution you need.
import pandas as pd
df = pd.DataFrame({'Station': ["Parque Dom Pedro II","Parque Dom Pedro II",
"Parque Dom Pedro II","Parque Dom Pedro II","Parque Dom Pedro II","Parque Dom Pedro II"],
'Pollutant':['MP2.5','MP2.5','MP2.5','MP2.5','MP2.5','MP2.5'],
'Data': ['05/07/2016 00:00','05/07/2016 01:00',
'05/07/2016 02:00','05/07/2016 03:00','05/07/2016 04:00','05/07/2016 04:00'],
'Average':['75,21','71,04','58,47','48,28','38,39','29,64']})
row = df['Average'].values.tolist()
col = df['Station'].values.tolist()
data = {'Data': df['Data'].values.tolist(), "Parque Dom Pedro II": []}
for x in range(0, len(col)):
if (df['Station'][x] == "Parque Dom Pedro II"):
data["Parque Dom Pedro II"].append(row[x])
nw_df = pd.DataFrame(data)
print("FIRST DATAFRAME:\n" str(df) "\n\nSECOND DATAFRAME:\n" str(nw_df))
If your table has more than one station, there is this other way to do it
import pandas as pd
def condition(df, len_col, col):
data = []
for x in range(0, len_col):
if (df['Station'][x] == col):
data.append(row[x])
else:
data.append('')
return data
df = pd.DataFrame({'Station': ["Parque Dom Pedro II","Parque Dom Pedro II",
"Parque Dom Pedro II","Parque Dom Pedro II","Parque Dom Pedro II","Another Station",
"Another Station","Another Station","Another Station","Another Station"],
'Pollutant':['MP2.5','MP2.5','MP2.5','MP2.5','MP2.5','MP2.5','MP2.5','MP2.5','MP2.5','MP2.5'],
'Data': ['05/07/2016 00:00','05/07/2016 01:00',
'05/07/2016 02:00','05/07/2016 03:00','05/07/2016 04:00','05/07/2016 05:00',
'05/07/2016 06:00','05/07/2016 06:30','05/07/2016 07:00','05/07/2016 08:00'],
'Average':['75,21','71,04','58,47','48,28','38,39','29,64','29,64','29,64','29,64','29,64']})
row = df['Average'].values.tolist()
col = df['Station'].values.tolist()
data = {'Data': df['Data'].values.tolist(), "Parque Dom Pedro II": []}
temp_col = col[0]
data[temp_col] = condition(df, len(col), temp_col)
for x in range(0, len(col)):
if (temp_col != col[x]):
temp_col = col[x]
data[temp_col] = condition(df, len(col), temp_col)
nw_df = pd.DataFrame(data)
print("FIRST DATAFRAME:\n" str(df) "\n\nSECOND DATAFRAME:\n" str(nw_df))