I'm trying to plot a precedence matrix plot from
So far I'm able to add the text and plot the categorical variables with the count.
def plot_precedence_matrix(data,colx,coly,cols,color=['grey','black'],ratio=10,font='Helvetica',save=False,save_name='Default'):
df = data.copy()
# Create a dict to encode the categeories into numbers (sorted)
colx_codes=dict(zip(df[colx].sort_values().unique(),range(len(df[colx].unique()))))
coly_codes=dict(zip(df[coly].sort_values(ascending=False).unique(),range(len(df[coly].unique()))))
# Apply the encoding
df[colx]=df[colx].apply(lambda x: colx_codes[x])
df[coly]=df[coly].apply(lambda x: coly_codes[x])
ax=plt.gca()
ax.xaxis.set_label_position('top')
ax.xaxis.set_ticks_position('top')
# Prepare the aspect of the plot
# plt.rcParams['xtick.bottom'] = plt.rcParams['xtick.labelbottom'] = False
# plt.rcParams['xtick.top'] = plt.rcParams['xtick.labeltop'] = True
plt.rcParams['font.sans-serif']=font
plt.rcParams['xtick.color']=color[-1]
plt.rcParams['ytick.color']=color[-1]
# plt.box(False)
# Plot all the lines for the background
for num in range(len(coly_codes)):
plt.hlines(num,-1,len(colx_codes),linestyle='dashed',linewidth=2,color=color[num%2],alpha=0.1)
for num in range(len(colx_codes)):
plt.vlines(num,-1,len(coly_codes),linestyle='dashed',linewidth=2,color=color[num%2],alpha=0.1)
for x, y, tex in zip(df[colx], df[coly], df[colx]):
t = plt.text(x, y, round(tex, 1), horizontalalignment='center',
verticalalignment='center', fontdict={'color':'black',
'size':30})
# Change the ticks numbers to categories and limit them
plt.xticks(ticks=list(colx_codes.values()),labels=colx_codes.keys(),rotation=90)
plt.yticks(ticks=list(coly_codes.values()),labels=coly_codes.keys())
# Lighten borders
plt.gca().spines["top"].set_alpha(0.1)
plt.gca().spines["bottom"].set_alpha(0.1)
plt.gca().spines["right"].set_alpha(0.1)
plt.gca().spines["left"].set_alpha(0.1)
# Save if wanted
if save:
plt.savefig(save_name '.png')
Sample dataset
| Antecedent | Consequent | Count |
|-------------------:|-------------------:|-------|
| register request | examine thoroughly | 1 |
| examine thoroughly | check ticket | 2 |
| check ticket | decide | 6 |
| decide | reject request | 3 |
| register request | check ticket | 2 |
| check ticket | examine casually | 2 |
| examine casually | decide | 2 |
| decide | pay compensation | 3 |
| register request | examine casually | 3 |
| examine casually | check ticket | 4 |
| decide | reinitiate request | 3 |
| reinitiate request | examine thoroughly | 1 |
| check ticket | examine thoroughly | 1 |
| examine thoroughly | decide | 1 |
| reinitiate request | check ticket | 1 |
| reinitiate request | examine casually | 1 |
colors=['darkorange','grey','darkblue']
#create the plot
fig = plt.figure(figsize=(12,8))
plot_precedence_matrix(df, 'Antecedent', 'Consequent', 'Count',color=colors,ratio=100, font='cursive')
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.show()
How to add the rectangular boxes with color scale using matplotlib? Can anybody shed some light on plotting the above plot with Python? I would be happy to receive any leads on it from you.
CodePudding user response:
You could draw colored rectangles at each of the positions. A colormap together with a norm could define the color.
Here is an example:
from matplotlib import pyplot as plt
from matplotlib.cm import ScalarMappable
from matplotlib.colors import ListedColormap
import pandas as pd
import numpy as np
from io import StringIO
def plot_precedence_matrix(data, colx, coly, cols, color=['grey', 'black'], ratio=10, font='Helvetica',
save=False, save_name='Default'):
df = data.copy()
# Create a dict to encode the categeories into numbers (sorted)
colx_codes = dict(zip(df[colx].sort_values().unique(), range(len(df[colx].unique()))))
coly_codes = dict(zip(df[coly].sort_values(ascending=False).unique(), range(len(df[coly].unique()))))
# Apply the encoding
df[colx] = df[colx].apply(lambda x: colx_codes[x])
df[coly] = df[coly].apply(lambda x: coly_codes[x])
ax = plt.gca()
ax.xaxis.set_label_position('top')
ax.xaxis.set_ticks_position('top')
# Prepare the aspect of the plot
plt.rcParams['font.sans-serif'] = font
plt.rcParams['xtick.color'] = color[-1]
plt.rcParams['ytick.color'] = color[-1]
# Plot the lines for the background
for num in range(len(coly_codes)):
ax.hlines(num, -1, len(colx_codes), linestyle='dashed', linewidth=2, color=color[num % 2], alpha=0.1)
for num in range(len(colx_codes)):
ax.vlines(num, -1, len(coly_codes), linestyle='dashed', linewidth=2, color=color[num % 2], alpha=0.1)
cmap = ListedColormap(plt.get_cmap('Blues')(np.linspace(0.1, 1, 256))) # skip too light colors
norm = plt.Normalize(df[colx].min(), df[colx].max())
for x, y, tex in zip(df[colx], df[coly], df[colx]):
t = ax.text(x, y, round(tex, 1), horizontalalignment='center', verticalalignment='center',
fontdict={'color': 'black' if norm(tex) < 0.6 else 'white', 'size': 30})
ax.add_patch(plt.Rectangle((x - .5, y - .5), 1, 1, color=cmap(norm(tex)), ec='white'))
plt.colorbar(ScalarMappable(cmap=cmap, norm=norm), ax=ax)
# Change the ticks numbers to categories and limit them
ax.set_xticks(list(colx_codes.values()))
ax.set_xticklabels(colx_codes.keys(), rotation=90, fontsize=14)
ax.set_yticks(list(coly_codes.values()))
ax.set_yticklabels(coly_codes.keys(), fontsize=14)
# Lighten borders
for spine in ax.spines:
ax.spines[spine].set_alpha(0.1)
plt.tight_layout() # fit the labels into the figure
if save:
plt.savefig(save_name '.png')
df_str = """
register request | examine thoroughly | 1
examine thoroughly | check ticket | 2
check ticket | decide | 6
decide | reject request | 3
register request | check ticket | 2
check ticket | examine casually | 2
examine casually | decide | 2
decide | pay compensation | 3
register request | examine casually | 3
examine casually | check ticket | 4
decide | reinitiate request | 3
reinitiate request | examine thoroughly | 1
check ticket | examine thoroughly | 1
examine thoroughly | decide | 1
reinitiate request | check ticket | 1
reinitiate request | examine casually | 1 """
df = pd.read_csv(StringIO(df_str), delimiter="\s*\|\s*", engine='python', names=['Antecedent', 'Consequent', 'Count'])
colors = ['darkorange', 'grey', 'darkblue']
fig = plt.figure(figsize=(12, 8))
plot_precedence_matrix(df, 'Antecedent', 'Consequent', 'Count', color=colors, ratio=100, font='cursive')
plt.show()