I've got a stacked bar chart that shows the distribution of age groups among NBA teams using colors, the code looks like this:
import matplotlib.pyplot as mpl
import matplotlib.cm as mcm
import pandas as pd
import numpy as np
from typing import List, Tuple
def read_to_df(file_path: str) -> pd.DataFrame:
return pd.read_excel(file_path, index_col = 0)
def color_to_hex(color: Tuple[float]) -> str:
color = [i * 255 if i * 255 <= 255 else i * 255 - 1
for i in color[:-1]]
color = [int(round(i)) for i in color]
return "#xxx" % tuple(color)
def cmap_to_colors(cmap: str,
amount: int) -> List[str]:
cmap = mcm.get_cmap(cmap)
colors = [color_to_hex(cmap(i))
for i in np.linspace(0, 1, amount)]
return colors
def main() -> None:
df = read_to_df("age_dist_median_six.xlsx")
df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
# transpose the dataframe
df_age_only = df_age_only.iloc[::-1]
# get a list of colors from cmap
colors = cmap_to_colors("viridis", 6)
barh = df_age_only.plot.barh(stacked = True, color = colors,
width = 0.95, xticks = np.linspace(0, 100, 11),
figsize = (10, 15))
barh.legend(bbox_to_anchor=(1, 1))
barh.margins(x = 0)
mpl.savefig("stacked_six_viridis.png")
if __name__ == "__main__":
main()
I'm generally happy with the result, but I also wanted to highlight the largest bars: so I want the largest one (or ones, in case multiple bars have the largest percentage) to be shown with the actual color and all other bars shown in grey. How can I do it? The bar chart looks like this at the moment: The .xlsx file used for the chart can be seen here:
Alternatively, if you want to keep your original colors but highlight the bars with the largest values you can just change the transparency of your bars by varying the alpha value. See code below:
import matplotlib.pyplot as mpl
import matplotlib.cm as mcm
import pandas as pd
import numpy as np
from typing import List, Tuple
def read_to_df(file_path: str) -> pd.DataFrame:
return pd.read_excel(file_path, index_col = 0)
def color_to_hex(color: Tuple[float]) -> str:
color = [i * 255 if i * 255 <= 255 else i * 255 - 1
for i in color[:-1]]
color = [int(round(i)) for i in color]
return "#xxx" % tuple(color)
def cmap_to_colors(cmap: str,
amount: int) -> List[str]:
cmap = mcm.get_cmap(cmap)
colors = [color_to_hex(cmap(i)) for i in np.linspace(0, 1, amount)]
return colors
def main() -> None:
df = read_to_df("age_dist_median_six.xlsx")
df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
# transpose the dataframe
df_age_only = df_age_only.iloc[::-1]
colors = cmap_to_colors("viridis", 6)
fig=mpl.figure(figsize=(12,12))
N_teams=len(df_age_only)
for i in range(N_teams):
x_pos=0
for column,j in zip(df_age_only,range(len(colors))):
col_max=df_age_only.idxmax(axis='columns')[i]
if df_age_only[col_max][i]==df_age_only[column][i]:
if j==0:
mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='tab:grey',alpha=1)
elif j>0:
mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='tab:grey',alpha=1)
else:
if j==0:
mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='tab:grey',alpha=0.3)
elif j>0:
mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='tab:grey',alpha=0.3)
x_pos =df_age_only[column][i]
mpl.yticks(np.arange(N_teams),df_age_only.index,fontsize=9)
#Setting up legend:
for i in range(len(colors)):
mpl.plot([],[],color=colors[i],lw=10,label=str(df_age_only.keys()[i]))
mpl.legend()
mpl.show()
mpl.savefig("stacked_six_viridis.png")
if __name__ == "__main__":
main()
And the output:
Other things you can change to highlight your bars include edgecolor
and hatch
.
CodePudding user response:
An idea is to loop through the generated bars, and change their transparency.
The example code below uses plt
to make the code easier to compare to examples in the tutorials and on the web. Also note that pandas' plot functions usually return an ax
.
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import pandas as pd
df = pd.read_excel("age_dist_median_six.xlsx", index_col=0)
df_age_only = df.drop(["median", "youngest", "oldest"], axis=1)
ax = df_age_only.plot.barh(cmap='viridis', stacked=True, edgecolor='black', width=1, clip_on=False, figsize=(12, 6))
ax.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
ax.invert_yaxis()
ax.margins(x=0, y=0)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.tick_params(axis='y', length=0)
ax.xaxis.set_major_formatter(PercentFormatter(100))
for i in range(len(df_age_only)):
max_val = np.nanmax(df_age_only.iloc[i].values)
for bar_group in ax.containers:
bar = bar_group[i]
if bar.get_width() < max_val - 1e-6:
bar.set_alpha(0.4)
# bar.set_facecolor('#DDDDDD') # light grey
plt.tight_layout()
plt.show()