Sample dataframe
ticket_start_time ticket_end_time status customer_type ticket_type customer_type
0 None None None None None None
1 None None None None None None
2 None None None None None None
3 None None None None None None
8 2021-10-22 16:26:50 2021-10-22 19:16:28 Por Acción R INSTALLATION R
9 2021-10-22 16:26:50 2021-10-22 16:38:23 Por Acción R INSTALLATION R
10 2021-10-22 16:26:50 2021-10-22 19:16:28 Por Acción R INSTALLATION R
I'm using the below code but it is hardcoded.Create a reusabe fntns for the above dataframe
import pyarrow
import pandas as pd
df = read_df()
columns_list = [req_cols]
filter_conditions = ["status = 'closed'" and "customer_type = 'R'"]
df.query()
def select_filter_df(df, columns_list, filter_conditions):
# apply the filters, and query
return df
CodePudding user response:
Use function parameters?
def select_filter_df(filename, columns, querystring):
try:
df = pd.read_parquet(filename, columns=columns, engine='pyarrow')
df = df.query(querystring)
except Exception as error:
logger.error(error)
return df
# How to use it:
file_path = "D:\Project_centriam"
filename = os.path.join(file_path, "merged_result.parquet")
cols = ["ticket_start_time","ticket_end_time","status","customer_type","ticket_type","customer_type"]
qs = 'status == "Rechazado" and ticket_type =="INSTALLATION" and customer_type =="R"'
df = select_filter_df(filename, cols, qs)
CodePudding user response:
required_cols = ["install_ticket_start_time","install_ticket_end_time","install_status","install_customer_type","install_ticket_type"]
filter_condition = 'install_status == "Rechazado" and install_ticket_type =="INSTALLATION" and install_customer_type =="R"'
def filter_df(df, column_list, filtered_df):
try:
df = pd.read_parquet(filename, engine='pyarrow')
column_list = df.filter(required_cols)
filtered_df = column_list.query(filter_condition)
print(filtered_df)
except Exception as error:
logger.error(error)
filter_df(df, column_list, filtered_df)