Home > Enterprise >  split violinplot with different ranges using seaborn
split violinplot with different ranges using seaborn

Time:12-22

I am trying to plot two variables with different ranges using split violinplots in seaborn.

Here's what I have done so far:

from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np

df1 = pd.read_csv('dummy_metric1.csv')
df2 = pd.read_csv('dummy_metric2.csv')

fig, ax2 = plt.subplots()

sns.set_style('white')
palette1 = 'Set2'
palette2 = 'Set1'
colors_list = ['#78C850', '#F08030',  '#6890F0',  '#A8B820',  '#F8D030', '#E0C068', '#C03028', '#F85888', '#98D8D8']

ax1 = sns.violinplot(y=df1.Value,x=df1.modality,hue=df1.metric, palette=palette1, inner="stick")
xlim = ax1.get_xlim()
ylim = ax1.get_ylim()
for violin in ax1.collections:
    bbox = violin.get_paths()[0].get_extents()
    x0, y0, width, height = bbox.bounds
    violin.set_clip_path(plt.Rectangle((x0, y0), width / 2, height, transform=ax1.transData))
ax1.set_xlim(xlim)
ax1.set_ylim(ylim)
ax1.set_title("dummy")
ax1.set_ylabel("metric1")
ax1.set_xlabel("Modality")
ax1.set_xticklabels(ax1.get_xticklabels(), rotation=45, ha='right')
ax1.legend_.remove()

ax2 = ax1.twinx() 

ax2 = sns.violinplot(y=df2.Value,x=df2.modality,hue=df2.metric, palette=palette2, inner=None)
xlim = ax2.get_xlim()
ylim = ax2.get_ylim()
for violin in ax2.collections:
    bbox = violin.get_paths()[0].get_extents()
    x0, y0, width, height = bbox.bounds
    violin.set_clip_path(plt.Rectangle((x0, y0), width / 2, height, transform=ax2.transData))
ax2.set_xlim(xlim)
ax2.set_ylim(ylim)
ax2.set_ylabel("Metric2")
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=45, ha='right')
ax2.legend_.remove()

fig.tight_layout()
plt.show()

However, I am unable to use the right portion of the ax2 violins. Here's is the output.

enter image description here

When I do violin.set_clip_path(plt.Rectangle((width/2, y0), width / 2, height, transform=ax2.transData)) I get this result:

enter image description here

Can someone explain what am I missing? Also, how can I manage having inner="stick"?

TIA

CodePudding user response:

Here is an approach using split=True and dummy data to force a split with empty halves. For the left half, metric is set to 1 for the real data, and to 2 for the dummy data. And vice versa for the right half. We need to make sure all dataframes use the same categorical order for the modality column, to avoid a mix-up.

from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

sns.set_style('white')
df1 = pd.DataFrame({'modality': pd.Categorical.from_codes(np.random.randint(0, 3, 30), ['a', 'b', 'c']),
                    'Value': np.random.rand(30) * 25   50})
df1['metric'] = 1
df1_dummy = pd.DataFrame({'modality': pd.Categorical.from_codes([0], ['a', 'b', 'c']), 'Value': [np.nan]})
df1_dummy['metric'] = 2

df2 = pd.DataFrame({'modality': pd.Categorical.from_codes(np.random.randint(0, 3, 100), ['a', 'b', 'c']),
                    'Value': np.random.randn(100).cumsum() / 10   1})
df2['metric'] = 2
df2_dummy = pd.DataFrame({'modality': pd.Categorical.from_codes([0], ['a', 'b', 'c']), 'Value': [np.nan]})
df2_dummy['metric'] = 1

ax1 = sns.violinplot(y='Value', x='modality', hue='metric', palette=['turquoise', 'red'],
                     inner="stick", split=True, data=pd.concat([df1, df1_dummy]))
ax1.legend_.remove()
ax1.set_ylabel('metric 1')

ax2 = ax1.twinx()
sns.violinplot(y='Value', x='modality', hue='metric', palette=['turquoise', 'red'],
               inner="stick", split=True, data=pd.concat([df2, df2_dummy]), ax=ax2)
ax2.set_ylabel('metric 2')

plt.tight_layout()
plt.show()

sns.violinplot splitting with different value ranges

PS: Here is a possible adaption of the original code:

  • using plt.Rectangle((x0 width/2, y0), width/2, height) to clip the violins on ax2
  • using the ax= parameter of sns.violinplot() to indicate the correct subplot
  • not changing the xlim and ylim of neither ax
  • making sure both dataframes use the same categorical order for modality
  • to clip the "inner" lines, for ax1: loop through the lines, get their x0 and x1, and shorten the line to x0 and (x0 x1)/2
  • similar for ax2: loop through the lines, get their x0 and x1, and shorten the line to (x0 x1)/2 and x1
  • update the legend for ax2 combining it with the legend of ax1, and then remove ax1's legend
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

df1 = pd.DataFrame({'modality': pd.Categorical.from_codes(np.random.randint(0, 3, 30), ['a', 'b', 'c']),
                    'Value': np.random.rand(30) * 25   50})
df1['metric'] = 1
df2 = pd.DataFrame({'modality': pd.Categorical.from_codes(np.random.randint(0, 3, 100), ['a', 'b', 'c']),
                    'Value': np.random.randn(100).cumsum() / 10   1})
df2['metric'] = 2

fig, ax1 = plt.subplots()

sns.set_style('white')
palette1 = 'Set2'
palette2 = 'Set1'

sns.violinplot(y=df1.Value, x=df1.modality, hue=df1.metric, palette=palette1, inner="stick", ax=ax1)
for violin in ax1.collections:
    bbox = violin.get_paths()[0].get_extents()
    x0, y0, width, height = bbox.bounds
    violin.set_clip_path(plt.Rectangle((x0, y0), width / 2, height, transform=ax1.transData))
for line in ax1.lines:
    x = line.get_xdata()
    line.set_xdata([x[0], np.mean(x)])

ax1.set_ylabel("metric1")
ax1.set_xlabel("Modality")

ax2 = ax1.twinx()
sns.violinplot(y=df2.Value, x=df2.modality, hue=df2.metric, palette=palette2, inner="stick", ax=ax2)
ylim = ax2.get_ylim()
for violin in ax2.collections:
    bbox = violin.get_paths()[0].get_extents()
    x0, y0, width, height = bbox.bounds
    violin.set_clip_path(plt.Rectangle((x0   width / 2, y0), width / 2, height, transform=ax2.transData))
for line in ax2.lines:
    x = line.get_xdata()
    line.set_xdata([np.mean(x), x[1]])
ax2.set_ylabel("Metric2")
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=45, ha='right')
ax2.legend(handles=ax1.legend_.legendHandles   ax2.legend_.legendHandles, title='Metric')
ax1.legend_.remove()

fig.tight_layout()
plt.show()
  • Related