I am using the following function to plot a grouped bar chart that compares between the performance of models built using imbalanced and balanced data.
# plot results to compare between balanced and imbalanced data
def barChartBalancing(imbalancedResults, rusResults, smoteResults, score, title, string):
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
#make font bigger
font = {'size' : 15}
matplotlib.rc('font', **font)
labels = names
x = np.arange(len(labels)) # the label locations
width = 0.2 # the width of the bars
fig, ax = plt.subplots(figsize=(10,9))
rects1 = ax.bar(x - width, imbalancedResults, width, label='Imbalanced Dataset')
rects2 = ax.bar(x , rusResults, width, label='RandomUnderSampler')
rects3 = ax.bar(x width, smoteResults, width, label='SMOTE')
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel(score)
ax.set_title(title)
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend(loc='upper center')
ax.bar_label(rects1, padding=5, fmt='%.2f', label_type='edge')
ax.bar_label(rects2, padding=5, fmt='%.2f', label_type='edge')
ax.bar_label(rects3, padding=5, fmt='%.2f', label_type='edge')
fig.tight_layout()
fileName = string '.png'
print(fileName)
plt.savefig('figures/resampling/' fileName)
plt.show()
However, when I run this, the labels above the grouped bars are overlapping, like so:
I tried changing the 'padding' value but then realised that is the distance between the and the end of the bar. I also tried changing the font size using fontsize argument in bar_label based on Rabinzel's comment, this kind of helped, but I have to make the font extremely small to prevent the overlapping completely, to the point where it is very hard to read.
Any ideas how I can fix the overlapping? I wonder if I can position the labels vertically on the bar? I think that would solve the issue without having to use extremely small font sizes.
CodePudding user response:
I dont know if you have consider plotting your figure horizontally, but with this approach you will be able to:
- have longer names as labels
- you will not have the overlapping problem.
- an unlimited number of models (entries) in the same figure.
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
# Example data
models = ('Linear regresion', 'Singular vector machine', 'linear vector clasification',
'Naive Bayes Classifier', 'Gradient boosting')
y_pos = np.arange(len(models))
distance = 3 10 * np.random.rand(len(models))
speed = 3 10 * np.random.rand(len(models))
age = 3 10 * np.random.rand(len(models))
width = 0.2
rects1=ax.barh(y_pos-width, distance, width, align='center', label="Unvalance dataset")
rects2=ax.barh(y_pos, speed, width, align='center', label="Random under sapler")
rects3=ax.barh(y_pos width, age, width, align='center', label="SMOTE")
ax.set_yticks(y_pos, labels=people)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('Performance')
ax.bar_label(rects1, padding=5, fmt='%.2f', label_type='edge')
ax.bar_label(rects2, padding=5, fmt='%.2f', label_type='edge')
ax.bar_label(rects3, padding=5, fmt='%.2f', label_type='edge')
plt.legend()
plt.xlim(0,15)
plt.show()
CodePudding user response:
Here is how you change fontsize and rotate the bar_label
:
example on one of them:
ax.bar_label(rects1, padding=5, fmt='%.2f', label_type='edge', fontsize=9, rotation='vertical')
You can also pass an integer to rotation if you like to have a different rotation: rotation=45