How would I find the quarterly averages of these monthly figures?-CodePudding

My dataset is similar to the below:

data = [['Jane', 10,10.5,11,45,66,21,88,99,77,41,22], ['John',11,22,55,34,44,22,44,55,88,56,47],['Tom',23,32,43,12,11,44,77,85,99,45,63]]
df = pd.DataFrame(data, columns = ['Name', '09-Aug-21', 'Aug-21', '02-Sep-21', 'Sep-21', '18-Oct-21', 'Oct-21', '02-Nov-21','Nov-21','14-Dec-21', 'Dec-21', '15-Jan-22'])
df

How can I add columns to this which show the quarterly figure, which is an average of the preceding three months? Eg, suppose we started at adding a column after 'Dec-21' called Q4 2021 which took the average of the columns called 'Oct-21', 'Nov-21' and 'Dec-21'.

Will I need to create a function which takes the preceding three values and returns an average, and then concatenate this to my dataframe? It does not have to be directly after each period, eg I am also happy to add all of the Quarterly averages right at the end.

CodePudding user response：


from datetime import datetime

def get_quarter_name(timestamp):
    """Convert '2021-12-01' to 'Q4-2021'
    """
    return f"Q{timestamp.quarter}-{timestamp.year}"

# your data
data = [['Jane', 10,10.5,11,45,66,21,88,99,77,41,22], ['John',11,22,55,34,44,22,44,55,88,56,47],['Tom',23,32,43,12,11,44,77,85,99,45,63]]
df = pd.DataFrame(data, columns = ['Name', '09-Aug-21', 'Aug-21', '02-Sep-21', 'Sep-21', '18-Oct-21', 'Oct-21', '02-Nov-21','Nov-21','14-Dec-21', 'Dec-21', '15-Jan-22'])

# filter only relevant columns, which start with an alphabetical character
cols = [col for col in df.columns if not col[0].isdigit()]

# extract only relevant columns and transpose
df_T = df[cols].set_index("Name").T

# convert index values to dates
df_T.index = pd.Index([pd.Timestamp(datetime.strptime(d,'%b-%y').strftime('%Y-%m-%d')) for d in df_T.index])

# resample by Quarters and transpose again to original format
df_quarter = df_T.resample("Q").mean().T

# rename columns to quarter-like descriptions
df_quarter.columns = [get_quarter_name(col) for col in df_quarter.columns]

df_quarter is your final answer which you can merge back to original df

Output:

      Q3-2021    Q4-2021
Name                    
Jane    27.75  53.666667
John    28.00  44.333333
Tom     22.00  58.000000

CodePudding user response：

here is one way to do it

# Define your quarters month
q1=['Aug','Sep']
q2=['Oct','Nov']
q3=['Dec','Jan']


df['q1']=df[df.columns[(df.columns.str.contains(rf'|'.join(q1)) )]].mean(axis=1)
df['q2']=df[df.columns[(df.columns.str.contains(rf'|'.join(q2)) )]].mean(axis=1)
df['q3']=df[df.columns[(df.columns.str.contains(rf'|'.join(q3)) )]].mean(axis=1)
df

    Name    09-Aug-21   Aug-21  02-Sep-21   Sep-21  18-Oct-21   Oct-21  02-Nov-21   Nov-21  14-Dec-21   Dec-21  15-Jan-22       q1     q2          q3
0   Jane           10     10.5         11       45         66       21         88       99         77       41         22   19.125  68.50   46.666667
1   John           11     22.0         55       34         44       22         44       55         88       56         47   30.500  41.25   63.666667
2   Tom            23     32.0         43       12         11       44         77       85         99       45         63   27.500  54.25   69.000000

CodePudding user response：

This is kinda messy, but it SHOULD allow you to dynamically generate a column for each quarter (does not have the quarter year you could add that logic if you want).

data = [['Jane', 10,10.5,11,45,66,21,88,99,77,41,22], ['John',11,22,55,34,44,22,44,55,88,56,47],['Tom',23,32,43,12,11,44,77,85,99,45,63]]
df = pd.DataFrame(data, columns = ['Name', '09-Aug-21', 'Aug-21', '02-Sep-21', 'Sep-21', '18-Oct-21', 'Oct-21', '02-Nov-21','Nov-21','14-Dec-21', 'Dec-21', '15-Jan-22'])
columns_to_use = [column for column in df.columns if column[0].isalpha()]
df = df[columns_to_use]
df = df.melt(id_vars = 'Name')
df['variable'] = '01-'   df['variable']
df['variable'] = pd.to_datetime(df['variable'],infer_datetime_format=True)
df['Quarter'] = df['variable'].dt.quarter
df['Quarter_Avg'] = df.groupby(['Name', 'Quarter'])['value'].transform('mean')

df1 = df.groupby(['Name', 'Quarter'])['Quarter_Avg'].agg('mean').reset_index()
df1['Quarter'] = 'Quarter '   df1['Quarter'].astype(str)
df1 = df1.pivot_table(index = 'Name', columns = 'Quarter', values = 'Quarter_Avg').reset_index()

df['variable'] = df['variable'].astype(str)
df['variable'] = df['variable'].apply(lambda x : '-'.join(x.split('-')[0:2]))
df = df.pivot_table(index = 'Name', columns = 'variable', values = 'value').reset_index()

df_final = df.merge(df1, on = 'Name')
df_final

CodePudding user response：

A fair amount of steps but it gives you the expected result

from datetime import datetime

data = [['Jane', 10,10.5,11,45,66,21,88,99,77,41,22,22], ['John',11,22,55,34,44,22,44,55,88,56,47,47],['Tom',23,32,43,12,11,44,77,85,99,45,63,63]]
df = pd.DataFrame(data, columns = ['Name', '09-Aug-21', 'Aug-21', '02-Sep-21', 'Sep-21', '18-Oct-21', 'Oct-21', '02-Nov-21','Nov-21','14-Dec-21', 'Dec-21', '15-Jan-22', 'Jan-22'])

# Melt the data frame by date
meltedDF = df.melt(id_vars=["Name"], var_name=["Date"])

# Remove the dates that don't match the "Month-year" format
meltedDF = meltedDF[pd.to_datetime(meltedDF.Date, format='%b-%y', errors='coerce').notna()].reset_index(drop=True)

# Convert those dates to datetime objects
meltedDF["Date"] = pd.to_datetime(meltedDF.Date, format='%b-%y')

# Find the quarter that those dates fall into and add the year string to the that quarter
meltedDF["Quarter"] = "Q"   meltedDF.Date.dt.quarter.astype(str)   " "   meltedDF.Date.dt.year.astype(str)

# Group by the quarter and the person's name then get the mean of their values
meltedDF = meltedDF.groupby(["Quarter", "Name"], as_index=False).mean().round(1)

# Pivot the table's Quarter values to be column names
meltedDF = pd.pivot_table(meltedDF, index=['Name'], values=['value'], columns="Quarter")

# Combine the names and the Quarter total values
meltedDF = pd.concat([meltedDF.reset_index()["Name"], meltedDF.reset_index()["value"]], axis=1)

# Merge these values back into the original Dataframe
df = df.merge(meltedDF, left_on='Name', right_on='Name')

Output:


    Name    09-Aug-21   Aug-21  02-Sep-21   Sep-21  18-Oct-21   Oct-21  02-Nov-21   Nov-21  14-Dec-21   Dec-21  15-Jan-22   Jan-22  Q1 2022 Q3 2021 Q4 2021
0   Jane    10  10.5    11  45  66  21  88  99  77  41  22  22  22.0    27.8    53.7
1   John    11  22.0    55  34  44  22  44  55  88  56  47  47  47.0    28.0    44.3
2   Tom     23  32.0    43  12  11  44  77  85  99  45  63  63  63.0    22.0    58.0