Home > Net >  How can I highlight the largest value(s) in df.plot.barh?
How can I highlight the largest value(s) in df.plot.barh?

Time:12-07

I've got a stacked bar chart that shows the distribution of age groups among NBA teams using colors, the code looks like this:

import matplotlib.pyplot as mpl
import matplotlib.cm as mcm
import pandas as pd
import numpy as np
from typing import List, Tuple
def read_to_df(file_path: str) -> pd.DataFrame:
    return pd.read_excel(file_path, index_col = 0)
def color_to_hex(color: Tuple[float]) -> str:
    color = [i * 255 if i * 255 <= 255 else i * 255 - 1
             for i in color[:-1]]
    color = [int(round(i)) for i in color]
    return "#xxx" % tuple(color)
def cmap_to_colors(cmap: str,
                   amount: int) -> List[str]:
    cmap = mcm.get_cmap(cmap)
    colors = [color_to_hex(cmap(i))
              for i in np.linspace(0, 1, amount)]
    return colors
def main() -> None:
    df = read_to_df("age_dist_median_six.xlsx")
    df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
    # transpose the dataframe
    df_age_only = df_age_only.iloc[::-1]
    # get a list of colors from cmap
    colors = cmap_to_colors("viridis", 6)
    barh = df_age_only.plot.barh(stacked = True, color = colors,
                                 width = 0.95, xticks = np.linspace(0, 100, 11),
                                 figsize = (10, 15))
    barh.legend(bbox_to_anchor=(1, 1))
    barh.margins(x = 0)
    mpl.savefig("stacked_six_viridis.png")
if __name__ == "__main__":
    main()

I'm generally happy with the result, but I also wanted to highlight the largest bars: so I want the largest one (or ones, in case multiple bars have the largest percentage) to be shown with the actual color and all other bars shown in grey. How can I do it? The bar chart looks like this at the moment: enter image description here The .xlsx file used for the chart can be seen here: enter image description here

Alternatively, if you want to keep your original colors but highlight the bars with the largest values you can just change the transparency of your bars by varying the alpha value. See code below:

import matplotlib.pyplot as mpl
import matplotlib.cm as mcm
import pandas as pd
import numpy as np
from typing import List, Tuple

def read_to_df(file_path: str) -> pd.DataFrame:
    return pd.read_excel(file_path, index_col = 0)
def color_to_hex(color: Tuple[float]) -> str:
    color = [i * 255 if i * 255 <= 255 else i * 255 - 1
             for i in color[:-1]]
    color = [int(round(i)) for i in color]
    return "#xxx" % tuple(color)
def cmap_to_colors(cmap: str,
                   amount: int) -> List[str]:
    cmap = mcm.get_cmap(cmap)
    colors = [color_to_hex(cmap(i)) for i in np.linspace(0, 1, amount)]
    return colors

def main() -> None:
    df = read_to_df("age_dist_median_six.xlsx")
    df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
    # transpose the dataframe
    df_age_only = df_age_only.iloc[::-1]
    colors = cmap_to_colors("viridis", 6)
    

    fig=mpl.figure(figsize=(12,12))
    N_teams=len(df_age_only)

    for i in range(N_teams):
      x_pos=0
      for column,j in zip(df_age_only,range(len(colors))):
        col_max=df_age_only.idxmax(axis='columns')[i]
        if df_age_only[col_max][i]==df_age_only[column][i]:
          if j==0:
            mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='tab:grey',alpha=1)
          elif j>0:
            mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='tab:grey',alpha=1)
           
        else:
          if j==0:
            mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='tab:grey',alpha=0.3)
          elif j>0:
            mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='tab:grey',alpha=0.3)
        
        x_pos =df_age_only[column][i]

   

    mpl.yticks(np.arange(N_teams),df_age_only.index,fontsize=9)

    #Setting up legend:
    for i in range(len(colors)):
      mpl.plot([],[],color=colors[i],lw=10,label=str(df_age_only.keys()[i]))
    mpl.legend()
    
    mpl.show()
    mpl.savefig("stacked_six_viridis.png")
if __name__ == "__main__":
    main()

And the output:

enter image description here

Other things you can change to highlight your bars include edgecolor and hatch.

CodePudding user response:

An idea is to loop through the generated bars, and change their transparency.

The example code below uses plt to make the code easier to compare to examples in the tutorials and on the web. Also note that pandas' plot functions usually return an ax.

import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import pandas as pd

df = pd.read_excel("age_dist_median_six.xlsx", index_col=0)

df_age_only = df.drop(["median", "youngest", "oldest"], axis=1)

ax = df_age_only.plot.barh(cmap='viridis', stacked=True, edgecolor='black', width=1, clip_on=False, figsize=(12, 6))
ax.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
ax.invert_yaxis()
ax.margins(x=0, y=0)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.tick_params(axis='y', length=0)
ax.xaxis.set_major_formatter(PercentFormatter(100))

for i in range(len(df_age_only)):
    max_val = np.nanmax(df_age_only.iloc[i].values)
    for bar_group in ax.containers:
        bar = bar_group[i]
        if bar.get_width() < max_val - 1e-6:
            bar.set_alpha(0.4)
            # bar.set_facecolor('#DDDDDD') # light grey
plt.tight_layout()
plt.show()

pandas barh with changed transparency

  • Related