I want to use PANDAS to save the processed data in the original format into another xlsx\csv file.
for,example,I have 1 xlsx file
num1,num2
1, 4
2, 5
3, 6
80, 120
if I use series1.to_excel('004.xlsx',index=False)
, the result in the 004.xlsx file is
num2
4
5
6
3.75
The num1 column data has not saved to the file.And if I use series1.to_csv('004.csv',mode='a',index=False)
, the result in the 004.xlsx file is
num1
1
2
3
2.5
num2
4
5
6
3.75
I want the output format saved in the xlsx\csv file to be something like this
num1,num2
1, 4
2, 5
3, 6
2.5, 3.75
COULD YOU HELP ME?Here is all my code:
import numpy as np
import pandas as pd
df_excel = pd.read_excel('002.xlsx')
df = df_excel
for col in df.columns:
df_current_col = df[col]
series1 = pd.Series(df_current_col)
s = (series1.quantile(0.75)-series1.quantile(0.25))
iqr = 3 * s
val_low = series1.quantile(0.25) - iqr*0.5
val_up = series1.quantile(0.25) iqr*0.5
outlier = series1[(series1 > val_up)|(series1 < val_low)]
normal_val = series1[(series1 < val_up)|(series1 > val_low)]
outlier = np.array(outlier)
sum = 0
normal_val = np.array(normal_val)
for i in range(len(normal_val)):
sum =normal_val[i]
mean = sum / (len(normal_val))
if outlier is None:
print('NONE')
else:
for i ,val in enumerate(series1):
series1[i] = float(np.where((((val < val_low) | (val > val_up))), mean, val))
########
#series1.to_excel('004.xlsx',index=False)
series1.to_csv('004.csv',mode='a',index=False)
CodePudding user response:
import numpy as np
import pandas as pd
df_excel = pd.read_excel('002.xlsx')
df = df_excel
def transformer(series1):
s = (series1.quantile(0.75)-series1.quantile(0.25))
iqr = 3 * s
val_low = series1.quantile(0.25) - iqr*0.5
val_up = series1.quantile(0.25) iqr*0.5
outlier = series1[(series1 > val_up)|(series1 < val_low)]
normal_val = series1[(series1 < val_up)|(series1 > val_low)]
outlier = np.array(outlier)
sum = 0
normal_val = np.array(normal_val)
for i in range(len(normal_val)):
sum =normal_val[i]
mean = sum / (len(normal_val))
if outlier is None:
print('NONE')
else:
for i ,val in enumerate(series1):
series1[i] = float(np.where((((val < val_low) | (val > val_up))), mean, val))
return series1
########
df = df.apply(transformer , axis=1)
#df.to_excel('004.xlsx',index=False)
df.to_csv('004.csv',mode='w',index=False)
CodePudding user response:
You are trying to append multiple 1D list to a CSV file. Instead you should be trying to append one 2D list.
import numpy as np
import pandas as pd
import csv
df_excel = pd.read_excel('002.xlsx')
df = df_excel
Storeage = []
for col in df.columns:
df_current_col = df[col]
series1 = pd.Series(df_current_col)
s = (series1.quantile(0.75)-series1.quantile(0.25))
iqr = 3 * s
val_low = series1.quantile(0.25) - iqr*0.5
val_up = series1.quantile(0.25) iqr*0.5
outlier = series1[(series1 > val_up)|(series1 < val_low)]
normal_val = series1[(series1 < val_up)|(series1 > val_low)]
outlier = np.array(outlier)
sum = 0
normal_val = np.array(normal_val)
for i in range(len(normal_val)):
sum =normal_val[i]
mean = sum / (len(normal_val))
if outlier is None:
print('NONE')
else:
for i ,val in enumerate(series1):
series1[i] = float(np.where((((val < val_low) | (val > val_up))), mean, val))
########
#series1.to_excel('004.xlsx',index=False)
Storeage.append(series1.tolist()) #turning List on side so it looks correct
Storeage = np.array(Storeage).T.tolist()
with open('Output.csv', 'w') as f:
writer = csv.writer(f, delimiter=",") #appending each row of the 2D list to a csv file
writer.writerows(Storeage)