This is a question about how to properly organize subplots, not how to create stacked bars.
I have the following dataframe:
corpus group mono p non p plus p minus p
0 fairview all 49 51 49 0
1 i2b2 all 46 54 46 0
2 mipacq all 44 56 43 1
and want to arrange the output as given in the two attached figures so that I get ncolumns and 2-rows, instead of two separate subplots with 1 row each (so in this case, there would be 2-rows, 3-columns on a single subplot instead of 1-row, 3-columns on 2 subplots):
I am generating these two figures as separate subplots using the following code:
data = <above dataframe>
semgroups = ['all']
corpus = ['fairview', 'i2b2', 'mipacq']
for sg in semgroups:
i = semgroups.index(sg)
ix = i 7
ncols = len(set(data.corpus.tolist()))
nrows = len(set(data.group.tolist()))
fig = plt.figure()
fig, axs = plt.subplots(1, ncols, sharey=True)
for ax,(idx,row) in zip(axs.flat, data.iterrows()):
# I WANT TO PLOT BOTH ROWS on same subplot
#row[['mono p', 'non p']].plot.bar(ax=ax, color=['C0','C1'])
row[['plus p', 'minus p']].plot.bar(ax=ax, color=['C0','C1'])
if row['corpus'] == 'fairview':
corpus = 'Fairview'
label = '(d) '
elif row['corpus'] == 'mipacq':
corpus = 'MiPACQ'
if ncols == 3:
label = '(f) '
else:
label = '(b) '
else:
corpus = 'i2b2'
label = '(e) '
ax.set_title(label corpus)
ax.tick_params(axis='x', labelrotation = 45)
if sg == 'all':
sg = 'All groups'
# Defining custom 'xlim' and 'ylim' values.
custom_ylim = (0, 60)
# Setting the values for all axes.
plt.setp(axs, ylim=custom_ylim)
fig.suptitle('Figure ' str(ix) ' ' sg)
In the code above, I iterate through my df grabbing the following rows to generate both separate subplots:
# BUT, I WANT TO PLOT BOTH ROWS ON SAME SUBPLOT
row[['mono p', 'non p']].plot.bar(ax=ax, color=['C0','C1'])
row[['plus p', 'minus p']].plot.bar(ax=ax, color=['C0','C1'])
No matter how I do this I cannot get the desired two rows in a single subplot(I always get an empty row of plots with no data on the second row).
CodePudding user response:
- See inline comments
- Tested in
python 3.8.12
,pandas 1.3.3
,matplotlib 3.4.3
,seaborn 0.11.2
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns # seaborn is a high-level api for matplotlib
# sample dataframe
data = {'corpus': ['fairview', 'i2b2', 'mipacq'], 'group': ['all', 'all', 'all'], 'mono p': [49, 46, 44], 'non p': [51, 54, 56], 'plus p': [49, 46, 43], 'minus p': [0, 0, 1]}
df = pd.DataFrame(data)
semgroups = df.group.unique() # unique groups
corpus = df.corpus.unique() # unique corpus
rows = [['mono p', 'non p'], ['plus p', 'minus p']] # columns for each row of plots
for sg in semgroups:
i = semgroups.index(sg)
ix = i 7
ncols = len(corpus) # 3 columns for the example
nrows = len(rows) # 2 rows for the example
# create a figure with 2 rows of 3 columns: axes is a 2x3 array of <AxesSubplot:>
fig, axes = plt.subplots(nrows, ncols, sharey=True, figsize=(12, 10))
# iterate through each plot row combined with a list from rows
for axe, row in zip(axes, rows):
# iterate through each plot column of the current row
for i, ax in enumerate(axe):
# select the data for each plot
data = df.loc[df.group.eq(sg) & df.corpus.eq(corpus[i]), row]
# plot the dataframe, but setting the bar color is more difficult
# data.T.plot(kind='bar', legend=False, ax=ax)
# plot the data with seaborn, which is easier to color the bars
sns.barplot(data=data, ax=ax)
if corpus[i] == 'fairview':
l2 = 'Fairview'
l1 = '(d) '
elif corpus[i] == 'mipacq':
l2 = 'MiPACQ'
if ncols == 3:
l1 = '(f) '
else:
l1 = '(b) '
else:
l2 = 'i2b2'
l1 = '(e) '
ax.set_title(l1 l2)
ax.tick_params(axis='x', labelrotation = 45)
if sg == 'all':
sg = 'All groups'
# Defining custom 'xlim' and 'ylim' values.
custom_ylim = (0, 60)
# Setting the values for all axes.
plt.setp(axes, ylim=custom_ylim)
fig.suptitle('Figure ' str(ix) ' ' sg)
fig.tight_layout()
plt.show()