Home > OS >  Pandas save data based on column in xlsx\csv file
Pandas save data based on column in xlsx\csv file

Time:09-12

I want to use PANDAS to save the processed data in the original format into another xlsx\csv file.

for,example,I have 1 xlsx file

num1,num2
1,   4
2,   5
3,   6
80,  120

if I use series1.to_excel('004.xlsx',index=False) , the result in the 004.xlsx file is

num2
4
5
6
3.75

The num1 column data has not saved to the file.And if I use series1.to_csv('004.csv',mode='a',index=False), the result in the 004.xlsx file is

num1
1
2
3
2.5
num2
4
5
6
3.75

I want the output format saved in the xlsx\csv file to be something like this

num1,num2
1,   4
2,   5
3,   6
2.5, 3.75

COULD YOU HELP ME?Here is all my code:

import numpy as np
import pandas as pd

df_excel = pd.read_excel('002.xlsx')
df = df_excel
for col in df.columns:
    df_current_col = df[col]
    series1 = pd.Series(df_current_col)
    s = (series1.quantile(0.75)-series1.quantile(0.25))
    iqr = 3 * s
    val_low = series1.quantile(0.25) - iqr*0.5
    val_up = series1.quantile(0.25)   iqr*0.5
    outlier = series1[(series1 > val_up)|(series1 < val_low)]
    normal_val = series1[(series1 < val_up)|(series1 > val_low)]
    outlier = np.array(outlier)
    sum = 0
    normal_val = np.array(normal_val)
    for i in range(len(normal_val)):
        sum =normal_val[i]
    mean = sum / (len(normal_val))
    if outlier is  None:
        print('NONE')
    else:
        for i ,val in enumerate(series1):
            series1[i] = float(np.where((((val < val_low) | (val > val_up))), mean, val))
        ########
        #series1.to_excel('004.xlsx',index=False)
        series1.to_csv('004.csv',mode='a',index=False)

CodePudding user response:

import numpy as np
import pandas as pd

df_excel = pd.read_excel('002.xlsx')
df = df_excel
def transformer(series1):
    s = (series1.quantile(0.75)-series1.quantile(0.25))
    iqr = 3 * s
    val_low = series1.quantile(0.25) - iqr*0.5
    val_up = series1.quantile(0.25)   iqr*0.5
    outlier = series1[(series1 > val_up)|(series1 < val_low)]
    normal_val = series1[(series1 < val_up)|(series1 > val_low)]
    outlier = np.array(outlier)
    sum = 0
    normal_val = np.array(normal_val)
    for i in range(len(normal_val)):
        sum =normal_val[i]
    mean = sum / (len(normal_val))
    if outlier is  None:
        print('NONE')
    else:
        for i ,val in enumerate(series1):
            series1[i] = float(np.where((((val < val_low) | (val > val_up))), mean, val))
        return series1
        ########
df = df.apply(transformer , axis=1)
#df.to_excel('004.xlsx',index=False)
df.to_csv('004.csv',mode='w',index=False)

CodePudding user response:

You are trying to append multiple 1D list to a CSV file. Instead you should be trying to append one 2D list.

import numpy as np
import pandas as pd
import csv
df_excel = pd.read_excel('002.xlsx')
df = df_excel
Storeage = []
for col in df.columns:
    df_current_col = df[col]
    series1 = pd.Series(df_current_col)
    s = (series1.quantile(0.75)-series1.quantile(0.25))
    iqr = 3 * s
    val_low = series1.quantile(0.25) - iqr*0.5
    val_up = series1.quantile(0.25)   iqr*0.5
    outlier = series1[(series1 > val_up)|(series1 < val_low)]
    normal_val = series1[(series1 < val_up)|(series1 > val_low)]
    outlier = np.array(outlier)
    sum = 0
    normal_val = np.array(normal_val)
    for i in range(len(normal_val)):
        sum =normal_val[i]
    mean = sum / (len(normal_val))
    if outlier is  None:
        print('NONE')
    else:
        for i ,val in enumerate(series1):
            series1[i] = float(np.where((((val < val_low) | (val > val_up))), mean, val))
        ########
        #series1.to_excel('004.xlsx',index=False)
        Storeage.append(series1.tolist()) #turning List on side so it looks correct
Storeage = np.array(Storeage).T.tolist()
with open('Output.csv', 'w') as f: 
    writer = csv.writer(f, delimiter=",") #appending each row of the 2D list to a csv file
    writer.writerows(Storeage)


 
  • Related