I have a loop that loops through 50 dataframes and calculates a variable called p90_annual
for each. Within this loop I want to plot p90_annual
as a page in a pdf where each page is p90_annual
for each dataframe. (I want a 50 page pdf where each page is the plot of p90_annual
for each dataframe) I am currently using:
with PdfPages('90thPercentile.pdf') as pdf:
plt.figure
plt.plot(p90_annual)
plt.title(j)
plt.ylabel('Days Above 90th Percentile')
pdf.savefig()
plt.close()
When I do this I only get one page with the last instance of p90_annual
plotted. How can I modify this so that it adds a new page for each instance of p90_annual
as it loops through?
For Context...below is the larger loop that I am trying to get this to work within
# Huge Loop
for j in TempDict:
#Make Baseline
df=TempDict[j]
df=pd.to_numeric(df.tmax, errors='coerce')
mask = (df.index >= '1900-01-01') & (df.index <= '1940-12-31')
Baseline=df.loc[mask]
Tmax=Baseline.astype(np.float)
Index=Baseline.index
DailyBase=pd.DataFrame(data={'date':Index,'tmax':Tmax})
#pivot dataframe
DailyBase['year']=DailyBase.date.dt.year
DailyBase['day']=DailyBase.date.dt.strftime('%m-%d')
BaseResult=DailyBase[DailyBase.day!='02-29'].pivot(index='year',columns='day',values='tmax')
#Calculate Percentiles
BaseResult.index=list(range(1,42))
BaseResult.insert(0,'12-31_',BaseResult['12-31'])
BaseResult.insert(0,'12-30_',BaseResult['12-30'])
BaseResult['01-01_'] = BaseResult['01-01']
BaseResult['01-02_'] = BaseResult['01-02']
p90_todict = {}
for i in range(len(BaseResult.columns)-4):
index = i 2
p90_todict[BaseResult.columns[index]] = np.quantile(BaseResult.iloc[:,index-2:index 3].dropna(),.9)
np.quantile(BaseResult.iloc[:,index-2:index 3].dropna(),.98)
#Make POR dataframe
#pull tmax and dates from original ACIS data
FullTmax=df.astype(np.float)
FullIndex=df.index
#create and rotate data frame
DailyPOR=pd.DataFrame(data={'date':FullIndex,'tmax':FullTmax})
DailyPOR['year']=DailyPOR.date.dt.year
DailyPOR['day']=DailyPOR.date.dt.strftime('%m-%d')
PORResult=DailyPOR[DailyPOR.day!='02-29'].pivot(index='year',columns='day',values='tmax')
#Compare POR and baseline
import copy
#eliminate leap years from POR daily data
noleap_DailyPOR = copy.copy(DailyPOR[DailyPOR.day != '02-29'])
noleap_DailyPOR.index = noleap_DailyPOR.date
#Use only winter months
only_winter = noleap_DailyPOR[(noleap_DailyPOR.index.month >= 12) | (noleap_DailyPOR.index.month <= 2)]
#set results to 0 for counts
p90results = pd.DataFrame(index = only_winter.date)
p90results['above90'] = 0
#Compare POR and percentiles
for index, row in only_winter.iterrows():
if row.tmax > p90_todict[row.day]:
p90results.loc[row.date,'above90'] = 1
#Sum annual counts above percentiles
p90_annual=p90results.groupby(p90results.index.year).sum()
with PdfPages('90thPercentile.pdf') as pdf:
plt.rcParams['text.usetex'] = False
plt.figure
plt.plot(p90_annual)
plt.title(j)
plt.ylabel('Days Above 90th Percentile')
pdf.savefig()
plt.close()
CodePudding user response:
Putting the for loop inside with PDFPages() as pdf:
will help:
import numpy as np
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
import pandas as pd
p90_annual = pd.DataFrame({"data": [19,20,13]})
with PdfPages('90thPercentile.pdf') as pdf:
for i in range(0,50):
plt.figure
plt.plot(p90_annual)
plt.title(i)
plt.ylabel('Days Above 90th Percentile')
pdf.savefig()
plt.close()
Update:
I think I didn't clarify well enough, the point is just move with PDFPages() as pdf:
outside of your loop then everything will be file.
with PdfPages('90thPercentile.pdf') as pdf:
#your loop here
for j in TempDict:
....
plt.figure
plt.plot(p90_annual)
plt.title(i)
plt.ylabel('Days Above 90th Percentile')
pdf.savefig()
plt.close()