I wrote a function to fill NaN with Values, but instead this function first fills the NaN with values and then deletes every value that was in the list before I did the function
def preprocessing(df):
median_male_3= df[(df["Sex"]=="male") & (df["Pclass"] ==3 )]["Age"].median()
median_male_2= df[(df["Sex"]=="male") & (df["Pclass"] ==2 )]["Age"].median()
median_male_1= df[(df["Sex"]=="male") & (df["Pclass"] ==1 )]["Age"].median()
median_female_3= df[(df["Sex"]=="female") & (df["Pclass"] ==3 )]["Age"].median()
median_female_2= df[(df["Sex"]=="female") & (df["Pclass"] ==2 )]["Age"].median()
median_female_1= df[(df["Sex"]=="female") & (df["Pclass"] ==1 )]["Age"].median()
def agemaking(para):
Age=para[0]
bookclass=para[1]
sex=para[2]
if pd.isnull(Age):
print(train_titanic["Age"]) #want to check what happens inside
if bookclass==3 and sex=="male":
return median_male_3
elif bookclass==2 and sex=="male":
return median_male_2
elif bookclass==1 and sex=="male":
return median_male_1
elif bookclass==3 and sex=="female":
return median_female_3
elif bookclass==2 and sex=="female":
return median_female_2
elif bookclass==1 and sex=="female":
return median_female_1
else:
return Age
train_titanic['Age']= train_titanic[['Age','Pclass','Sex']].apply(agemaking,axis=1)
thats my function
thats what i looked like before thats the suprising result
CodePudding user response:
Try this
def agemaking(para):
Age=para[0]
bookclass=para[1]
sex=para[2]
if pd.isnull(Age):
print(train_titanic["Age"])
if bookclass==3 and sex=="male":
return median_male_3
elif bookclass==2 and sex=="male":
return median_male_2
elif bookclass==1 and sex=="male":
return median_male_1
elif bookclass==3 and sex=="female":
return median_female_3
elif bookclass==2 and sex=="female":
return median_female_2
elif bookclass==1 and sex=="female":
return median_female_1
else:
return Age
else:
return age
CodePudding user response:
A shorter version of your code could be:
df['Age'] = df['Age'].fillna(df.groupby(['Sex', 'Pclass'])['Age'].transform('median'))
Compute the median Age per (Sex, Pclass) group and broadcast values to all rows with transform
. Finally fill nan values with the computed value previously only and only if Age is null.