Matplotlib
is a cross-platform, data visualization and graphical plotting library for Python and is highly customizable.
Matplotlib
has the great advantage of customization. Below is the customized Histogram with matploltlib
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.ticker as ticker
from matplotlib.patches import Rectangle
from matplotlib.gridspec import GridSpec
from matplotlib.patches import Polygon
from matplotlib.patches import ConnectionPatch
def customized_Histogram(df, j):
"""
Generate Histogram
Parameters:
****************
df:
pandas dataframe
j(str):
column name in str
****************
Generates Advanced Histogram
"""
# Colours for different percentiles
perc_25_colour = 'gold'
perc_50_colour = 'mediumaquamarine'
perc_75_colour = 'deepskyblue'
perc_95_colour = 'peachpuff'
# Plot the Histogram from the random data
fig, ax = plt.subplots(figsize=(14,8))
# '''
# counts = numpy.ndarray of count of data ponts for each bin/column in the histogram
# bins = numpy.ndarray of bin edge/range values
# patches = a list of Patch objects.
# each Patch object contains a Rectnagle object.
# e.g. Rectangle(xy=(-2.51953, 0), width=0.501013, height=3, angle=0)
# '''
counts, bins, patches = ax.hist(df[j], facecolor=perc_50_colour, edgecolor='gray')
# Set the ticks to be at the edges of the bins.
ax.set_xticks(bins.round(2))
plt.xticks(rotation=70)
# Set the graph title and axes titles
plt.title(f'Distribution of {j}', fontsize=20)
plt.ylabel('Count', fontsize=15)
plt.xlabel(j, fontsize=15)
# Change the colors of bars at the edges
twentyfifth, seventyfifth, ninetyfifth = np.percentile(df[j], [25, 75, 95])
for patch, leftside, rightside in zip(patches, bins[:-1], bins[1:]):
if rightside < twentyfifth:
patch.set_facecolor(perc_25_colour)
elif leftside > ninetyfifth:
patch.set_facecolor(perc_95_colour)
elif leftside > seventyfifth:
patch.set_facecolor(perc_75_colour)
# Calculate bar centre to display the count of data points and %
bin_x_centers = 0.5 * np.diff(bins) bins[:-1]
bin_y_centers = ax.get_yticks()[1] * 0.25
# Display the the count of data points and % for each bar in histogram
for i in range(len(bins)-1):
bin_label = "{0:,}".format(counts[i]) " ({0:,.2f}%)".format((counts[i]/counts.sum())*100)
plt.text(bin_x_centers[i],
bin_y_centers,
bin_label,
rotation=90,
rotation_mode='anchor')
# Annotation for bar values
ax.annotate('Each bar shows count and percentage of total',
xy=(.80,.30),
xycoords='figure fraction',
horizontalalignment='center',
verticalalignment='bottom',
fontsize=10,
bbox=dict(boxstyle="round",
fc="white"),
rotation=-90)
#create legend
handles = [Rectangle((0,0),1,1,color=c,ec="k") for c in [
perc_25_colour,
perc_50_colour,
perc_75_colour,
perc_95_colour
]
]
labels= ["0-25 Percentile","25-50 Percentile", "50-75 Percentile", ">95 Percentile"]
plt.legend(handles, labels, bbox_to_anchor=(0.5, 0., 0.80, 0.99))
# fig.savefig("filename.jpg",dpi=150, bbox_inches='tight')
plt.show()
import seaborn as sns
tips = sns.load_dataset("tips")
customized_Histogram(tips, "total_bill")
Customized Histogram plot
How to plot the above plot using Plotly
?
CodePudding user response: