I am trying to add a column of one-word categories from analyzing a column that contains a sentence in each row
I tried the following code but it kept giving me errors!
def loan_cat(row):
rows = df[df.columns[0]].count()
for i in rows:
data = df['purpose'][i]
if 'house' in data:
return 'house'
elif 'education' | 'university' in data:
return 'education'
elif 'wedding' in data:
return 'wedding'
elif 'car' in data:
return 'car'
elif 'real' in data:
return 'real estate'
elif 'property'in data:
return 'property'
return 'undefined'
df['purpose_1'] = df.apply(loan_cat, axis=1)
is there a better way to analyze and categorize the data?
CodePudding user response:
Use a dict
import pandas
data = pandas.Series(["purchase a house",
"purchase car",
"supplemental education",
"burger",
"attend university"])
arr = {"house": "house",
"education": "education",
"university": "education",
"car": "car"}
def foo(s, d):
for k, v in d.items():
if k in s:
return v
return "NA"
data.apply(lambda x: foo(x, arr))
# 0 house
# 1 car
# 2 education
# 3 NA
# 4 education
# dtype: object
CodePudding user response:
I figured out the answer:
def loan_cat(value):
if 'hous' in value:
return 'House'
elif 'educ' in value:
return 'Education'
elif 'university' in value:
return 'Education'
elif 'wedding' in value:
return 'Wedding'
elif 'car' in value:
return 'Car'
elif 'real' in value:
return 'Real Estate'
elif 'property'in value:
return 'Property'
return 'undefined'
df['purpose_cat'] = df['purpose'].apply(lambda value: loan_cat(value))
print(df['purpose_cat'].value_counts())