I have the following DataFrame
with two levels for both index and columns
import pandas as pd
import numpy as np
index = pd.MultiIndex.from_product([["A", "B"], ["AA", "BB"]])
columns = pd.MultiIndex.from_product([["X", "Y"], ["XX", "YY"]])
df = pd.DataFrame([[1,2,3,4],
[5,6,7,8],
[9,10,11,12],
[13,14,15,16]], index = index, columns = columns)
I would like to convert values in this DataFrame
to percents in such a way that I can pass level of both index and column and the corresponding "square" will be converted to percents. To be more precise, I would like to pass rowLevel = 0
(which corresponds to A
and B
index values) and colLevel=0
(which correspond to X
and Y
column values) and as a results I should get
res = pd.DataFrame([[1/14,2/14,3/22,4/22],
[5/14,6/14,7/22,8/22],
[9/46,10/46,11/54,12/54],
[13/46,14/46,15/54,16/54]], index = index, columns = columns)
It should also works for higher rowLevel
's and colLevel
's. How can I do that in a most elegant way?
CodePudding user response:
import pandas as pd
import numpy as np
index = pd.MultiIndex.from_product([["A", "B"], ["AA", "BB"]])
columns = pd.MultiIndex.from_product([["X", "Y"], ["XX", "YY"]])
df = pd.DataFrame([[1,2,3,4],
[5,6,7,8],
[9,10,11,12],
[13,14,15,16]], index = index, columns = columns)
def f(df, rowLevel, colLevel):
res = df.copy()
index_levels = df.index.get_level_values(level=rowLevel)
columns_levels = df.columns.get_level_values(level=colLevel)
for i in set(index_levels):
for j in set(columns_levels):
idxi = index_levels == i
idxj = columns_levels == j
res.loc[idxi, idxj] /= res.loc[idxi, idxj].sum().sum()
return res
rowLevel = 0
colLevel = 0
res = f(df, rowLevel, colLevel)
print(res)
prints
X Y
XX YY XX YY
A AA 0.071429 0.142857 0.136364 0.181818
BB 0.357143 0.428571 0.318182 0.363636
B AA 0.195652 0.217391 0.203704 0.222222
BB 0.282609 0.304348 0.277778 0.296296
works for any combination of levels (here aided with color-coding by groups):
from IPython.display import display
pd.set_option('precision', 2)
def colorcode(x, rowLevel, colLevel):
colors = ['brown', 'purple', 'gray', 'olive']
x_colored = x.copy()
index_levels = x.index.get_level_values(level=rowLevel)
columns_levels = x.columns.get_level_values(level=colLevel)
m, n = len(set(index_levels)), len(set(columns_levels))
for a, i in enumerate(set(index_levels)):
for b, j in enumerate(set(columns_levels)):
idxi = index_levels == i
idxj = columns_levels == j
color = colors[a b * n]
x_colored.loc[idxi, idxj] = f'background-color: {color}'
return x_colored
for rowLevel in [0, 1]:
for colLevel in [0, 1]:
mat = f(df, rowLevel, colLevel).astype(float).round(2)
display(
mat.style.apply(
lambda x: colorcode(x, rowLevel, colLevel),
axis=None
)
)
displays