I have a dataframe in a format:
d = {'vertex': [0,0,0,1,1] ,'hour': [1, 1,2,1,2], 'value': ['sun', 'watermelon','sun','watermelon','sun']}
df = pd.DataFrame(data=d)
How can I highlight rows where there is both 'sun'
and 'watermelon'
in 'value'
column for a grouped dataframe by two columns [vertex,hour]
. Which means that first two rows should be highlighted, because for vertex 0 and hour 1 there are bot values 'sun' and 'watermelon' in the grouped dataframe.
CodePudding user response:
Use:
def highlight_by_list(x):
#list of values
L = ['sun', 'watermelon']
c1 = 'background-color: yellow'
# condition
m = df.groupby(['vertex','hour'])['value'].transform(lambda x: set(x) == set(L))
# empty DataFrame of styles
df1 = pd.DataFrame('', index=x.index, columns=x.columns)
# set all matched rows to yellow
df1.loc[m, :] = c1
return df1
df.style.apply(highlight_by_list, axis=None).to_excel('file.xlsx', index=False)
EDIT:
d = {'vertex': [0,0,1,1,1] ,
'hour': [1, 1,2,2,2],
'value': ['sun', 'watermelon','apple','watermelon','sun']}
df = pd.DataFrame(data=d)
def highlight_by_list(x):
#list of values
L = ['sun', 'watermelon']
c1 = 'background-color: yellow'
# conditions for test by substring and also for get only rows from L
m1 = df.groupby(['vertex','hour'])['value'].transform(lambda x: set(L).issubset(set(x)))
m2 = df['value'].isin(L)
# empty DataFrame of styles
df1 = pd.DataFrame('', index=x.index, columns=x.columns)
# set all matched rows to yellow
df1.loc[m1 & m2, :] = c1
return df1
CodePudding user response:
Try this:
rows_series = df[['vertex','hour']].duplicated(keep=False)
rows = rows_series[rows_series].index.values
df.style.apply(lambda x: ['background: yellow' if x.name in rows else '' for i in x], axis=1)