I have created a simple function for cleaning dataframe, but it is not creating it. Why?
The data frame that I would like to have is df_cleaned when I run the function.
def cleaning_data(data_set):
df_cleaned = []
for index in data_set.columns[4:]:
data_set['new'] = 1
df_clean_all = pd.DataFrame(data_set['new'])
df_clean_all[index] = data_set[index][data_set[index].between(data_set[index].quantile(0.05),
data_set[index].quantile(0.95))]
df_clean_all = df_clean_all.drop('new', 1)
df_clean_all = df_clean_all.fillna(df_clean_all.mean())
df_cleaned.append(df_clean_all)
df_cleaned = pd.concat(df_cleaned, axis = 1)
cleaning_data(df_data)
CodePudding user response:
Is that a matter of returning your result? return df_cleaned
import numpy as np
import pandas as pd
def cleaning_data(data_set):
df_cleaned = []
for index in data_set.columns[4:]:
data_set['new'] = 1
df_clean_all = pd.DataFrame(data_set['new'])
df_clean_all[index] = data_set[index][data_set[index].between(data_set[index].quantile(0.05),
data_set[index].quantile(0.95))]
df_clean_all = df_clean_all.drop('new', 1)
df_clean_all = df_clean_all.fillna(df_clean_all.mean())
df_cleaned.append(df_clean_all)
return pd.concat(df_cleaned, axis = 1)
random_df = pd.DataFrame(
np.random.randint(0, 1000, size=(10, 6)), columns=list('ABCDEF')
)
print(cleaning_data(random_df))