I have a python script that uses multiple if statement
conditions in order to allow the user to filter the dataframe and return the required results.
The problem is that I have multiple conditions that make the script to be very slow.
My question is how to remove or reduce the redundant if conditions and make the conditions dynamic based on the selection of the user for columns that he wants to filter.
code:
col1_ch,col2_ch,col3_ch = st.sidebar.columns(3)
with col1_ch:
adv_searchcheckbox_name_nickname = st.checkbox("Name or Nickname or Mother name",value = False,key=1)
adv_searchcheckbox_gender = st.checkbox("Gender",value = False,key=2)
adv_searchcheckbox_status_type = st.checkbox("Status type",value = False,key=3)
adv_searchcheckbox_country = st.checkbox("Country",value = False,key=4)
adv_searchcheckbox_bd = st.checkbox("Date Of Birth",value = False,key=5)
if adv_searchcheckbox_name_nickname:
col1, col2,col3 = st.sidebar.columns(3)
with col1:
name_search = st.text_input("name")
with col2:
nickname_search = st.text_input("nickname")
with col3:
Mother_name_search = st.text_input("mother name")
if adv_searchcheckbox_gender:
radio_gender = st.sidebar.radio(label="Gender", options=["M","F"])
if st.sidebar.button("search"):
# *******************name nickname mothername checkbox***************
# . only name/nickname/mother name is checked
if adv_searchcheckbox_name_nickname and not adv_searchcheckbox_gender and not adv_searchcheckbox_status_type and not adv_searchcheckbox_country and not adv_searchcheckbox_bd:
# if name is specified but not the nickname and mother name
if name_search != '' and nickname_search == '' and Mother_name_search =='':
df_result_search = df[df['name'].str.contains(name_search, case=False, na=False)]
# if nickname is specified but not the name and mother name
elif nickname_search != ''and name_search == '' and Mother_name_search == '':
df_result_search = df[df['nickname'].str.contains(nickname_search, case=False, na=False)]
# if mother name is specified but not the name and nickname
elif Mother_name_search != '' and name_search == '' and nickname_search == '':
df_result_search = df[df['mother_name'].str.contains(Mother_name_search, case=False, na=False)]
# if both name and nickname are specified
elif name_search != '' and nickname_search != '' and Mother_name_search!='':
df_result_search = df[(df['name'].str.contains(name_search, case=False, na=False)) & (df['nickname'].str.contains(nickname_search, case=False, na=False))]
# if both name and mother_name are specified
elif name_search != '' and Mother_name_search!='' and nickname_search == '' :
df_result_search = df[(df['name'].str.contains(name_search, case=False, na=False)) & df['mother_name'].str.contains(Mother_name_search, case=False, na=False)]
# if both nickname and mother_name are specified
elif nickname_search != '' and Mother_name_search!='' and name_search == '':
df_result_search = df[(df['nickname'].str.contains(nickname_search, case=False, na=False)) & df['mother_name'].str.contains(Mother_name_search, case=False, na=False)]
# if user does not enter anything
else:
st.warning('Please enter at least a name or a nickname or mother name ')
# *******************name nickname mothername checkbox***************
# *******************gender checkbox***************
elif adv_searchcheckbox_gender and adv_searchcheckbox_name_nickname and not adv_searchcheckbox_status_type and not adv_searchcheckbox_country and not adv_searchcheckbox_bd:
# if name is specified but not the nickname and mother name
if name_search != '' and radio_gender !='' and nickname_search == '' and Mother_name_search =='':
df_result_search = df[df['name'].str.contains(name_search, case=False, na=False) & (df['gender'] ==(radio_gender))]
# if nickname is specified but not the name and mother name
elif nickname_search != '' and radio_gender !=''and name_search == '' and Mother_name_search == '':
df_result_search = df[df['nickname'].str.contains(nickname_search, case=False, na=False)& (df['gender'] ==(radio_gender))]
# if mother name is specified but not the name and nickname
elif Mother_name_search != '' and radio_gender !='' and name_search == '' and nickname_search == '' :
df_result_search = df[df['mother_name'].str.contains(Mother_name_search, case=False, na=False)& (df['gender'] ==(radio_gender))]
# if both name and nickname are specified
elif name_search != '' and nickname_search != '' and radio_gender !=''and Mother_name_search=='':
df_result_search = df[(df['name'].str.contains(name_search, case=False, na=False)) & (df['nickname'].str.contains(nickname_search, case=False, na=False)) & (df['gender'] ==(radio_gender))]
# if both name and mother name are specified
elif name_search != '' and radio_gender !=''and Mother_name_search!='' and nickname_search == '':
df_result_search = df[(df['name'].str.contains(name_search, case=False, na=False)) & (df['mother_name'].str.contains(Mother_name_search, case=False, na=False)) & (df['gender'] ==(radio_gender))]
# if both nickname and mother name are specified
elif nickname_search != '' and radio_gender !=''and Mother_name_search!='' and name_search == '':
df_result_search = df[(df['nickname'].str.contains(nickname_search, case=False, na=False)) & (df['mother_name'].str.contains(name_search, case=False, na=False)) & (df['gender'] ==(radio_gender))]
# if all name nickname and mother name are specified
elif nickname_search != '' and radio_gender !=''and Mother_name_search!='' and name_search != '':
df_result_search = df[(df['name'].str.contains(name_search, case=False, na=False)) & (df['nickname'].str.contains(nickname_search, case=False, na=False)) & (df['mother_name'].str.contains(name_search, case=False, na=False)) & (df['gender'] ==(radio_gender))]
# if user does not enter anything
else:
st.warning('Specify at least 1 input ')
# *******************gender checkbox***************
st.dataframe(df_result_search)
This is just for the first 2 columns
CodePudding user response:
This would probably be a good use case for the new pattern matching feature in Python 3.10.
Apart from that you are using the same variables over and over again in your is/elif statements (eg. nickname_search
, name_search
, ...).
You could define their content upfront with something like:
name_search_empty = ''
That way you could at least avoid the redundant comparison operators.
CodePudding user response:
You should filter it step by step. In each step you narrow down the final dataframe (stored to result
variable) by applying just a single condition.
result = df # initialize the result and narrow-down the result in each of the following conditions
if name_search: # note: do not use name_search != ''
result = result[result['name'].str.contains(name_search, case=False, na=False)]
if nickname_search:
result = result[result['nickname'].str.contains(nickname_search, case=False, na=False)]
if mother_name_search:
result = result[result['mother_name'].str.contains(mother_name_search, case=False, na=False)]
# ... etc. add more conditions as you need
I think you got the idea now.