How to highlight rows if there are two elements in a column for a grouped dataframe-CodePudding

I have a dataframe in a format:

d = {'vertex': [0,0,0,1,1] ,'hour': [1, 1,2,1,2], 'value': ['sun', 'watermelon','sun','watermelon','sun']}
df = pd.DataFrame(data=d)

How can I highlight rows where there is both 'sun' and 'watermelon' in 'value' column for a grouped dataframe by two columns [vertex,hour]. Which means that first two rows should be highlighted, because for vertex 0 and hour 1 there are bot values 'sun' and 'watermelon' in the grouped dataframe.

CodePudding user response：

Use:

def highlight_by_list(x):
    #list of values 
    L = ['sun', 'watermelon']
    c1 = 'background-color: yellow'
    # condition
    m = df.groupby(['vertex','hour'])['value'].transform(lambda x: set(x) == set(L))
    # empty DataFrame of styles
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)

    # set all matched rows to yellow 
    df1.loc[m, :] = c1
    return df1

df.style.apply(highlight_by_list, axis=None).to_excel('file.xlsx', index=False)

EDIT:

d = {'vertex': [0,0,1,1,1] ,
       'hour': [1, 1,2,2,2],
     'value': ['sun', 'watermelon','apple','watermelon','sun']}
df = pd.DataFrame(data=d)

def highlight_by_list(x):
    #list of values 
    L = ['sun', 'watermelon']
    c1 = 'background-color: yellow'
    # conditions for test by substring and also for get only rows from L
    m1 = df.groupby(['vertex','hour'])['value'].transform(lambda x: set(L).issubset(set(x)))
    m2 = df['value'].isin(L)
    # empty DataFrame of styles
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)

    # set all matched rows to yellow 
    df1.loc[m1 & m2, :] = c1
    return df1

CodePudding user response：

Try this:

rows_series = df[['vertex','hour']].duplicated(keep=False)
rows = rows_series[rows_series].index.values
df.style.apply(lambda x: ['background: yellow' if x.name in rows else '' for i in x], axis=1)