Find all players with the longest winning streak-CodePudding

I am trying to crack the problem of finding the player(s) with the longest streak of winning using Python's 3.2. I am only able to work out an unscalable solution, but I could not come up with a better one yet. Can anyone please help me with a better solution? Also, I am curious how to adapt such solution to output the player(s) with the longest winning streak per month or year?

Below is the sample dataframe players_results

Player_id     Match_Date             Match_Result
401           2021-05-04 00:00:00    W
401           2021-05-09 00:00:00    L
401           2021-05-16 00:00:00    W
401           2021-05-18 00:00:00    w
401           2021-05-22 00:00:00    L
401           2021-06-15 00:00:00    L
401           2021-06-16 00:00:00    W
401           2021-06-18 00:00:00    W
402           2021-05-14 00:00:00    L
402           2021-05-23 00:00:00    L
402           2021-05-24 00:00:00    W
402           2021-06-01 00:00:00    W
402           2021-06-02 00:00:00    W
402           2021-07-01 00:00:00    W
403           2021-05-03 00:00:00    L
403           2021-05-11 00:00:00    W
403           2021-05-12 00:00:00    W
403           2021-05-13 00:00:00    W
403           2021-05-20 00:00:00    W
403           2021-05-25 00:00:00    W
403           2021-07-06 00:00:00    L
404           2021-05-10 00:00:00    W
404           2021-05-16 00:00:00    W
404           2021-05-20 00:00:00    W
404           2021-05-22 00:00:00    W
404           2021-05-28 00:00:00    L
405           2021-05-07 00:00:00    L
405           2021-05-25 00:00:00    W
405           2021-06-06 00:00:00    L
405           2021-06-07 00:00:00    W
405           2021-06-14 00:00:00    W
405           2021-07-01 00:00:00    W

Expected Output

player_id     longest_winningstreak
403           5

My unscalable code

df = players_results.groupby('player_id').count()
df = df['match_date'].reset_index()
#record result of player_id = 401 into vector b
a = [] # record the number of consecutive "W" of player_id = 401
x = 0
for i in range(df['match_date'][0]):
    if players_results['match_result'][i] == 'W':
        x = x   1
        if (i == df['match_date'][0]-1):
            a.append(x)
    else:
        a.append(x)
        x = 0
print(a)
b = []
b.append([df['player_id'][0],max(a)])
print(b)

c = []
y=0
for i in range(df['match_date'][0], df['match_date'][0] df['match_date'][1]):
    if players_results['match_result'][i] == 'W':
        y = y   1
        if (i == df['match_date'][0] df['match_date'][1]-1):
            c.append(y)
    else:
        c.append(y)
        y = 0
#record result of player_id = 402 into the vector b
b.append([df['player_id'][1],max(c)])

d = []
z=0
for i in range(df['match_date'][0] df['match_date'][1], df['match_date'] [0] df['match_date'][1] df['match_date'][2]):
    if players_results['match_result'][i] == 'W':
        z = z   1
        if (i == df['match_date'][0] df['match_date'][1] df['match_date'][2]-1):
            d.append(z)
    else:
        d.append(z)
        z = 0
b.append([df['player_id'][2],max(d)])

e = []
z2=0
for i in range(df['match_date'][0] df['match_date'][1] df['match_date'][2], df['match_date'][0] df['match_date'][1] df['match_date'][2] df['match_date'][3]):
    if players_results['match_result'][i] == 'W':
        z2 = z2   1
        if (i == df['match_date'][0] df['match_date'][1] df['match_date'][2] df['match_date'][3]-1):
            e.append(z2)
    else:
        e.append(z2)
        z2 = 0
#print(e)
b.append([df['player_id'][3],max(e)])

f = []
z3=0
for i in range(df['match_date'][0] df['match_date'][1] df['match_date'][2] df['match_date'][3], df['match_date'][0] df['match_date'][1] df['match_date'][2] df['match_date'][3] df['match_date'][4]):
    if players_results['match_result'][i] == 'W':
        z3 = z3   1
        if (i == df['match_date'][0] df['match_date'][1] df['match_date'][2] df['match_date'][3] df['match_date'][4]-1):
            f.append(z3)
    else:
        f.append(z3)
        z3 = 0
#print(e)
b.append([df['player_id'][4],max(f)])

CodePudding user response：

One way using itertools.groupby:

from itertools import groupby

s = df["Match_Result"].str.lower().eq("w")

def longest_pattern(ser):
    lens = [len(list(g)) for k, g in groupby(ser) if k]
    return max(lens)

new_df = s.groupby(df["Player_id"]).apply(longest_pattern)

Or bit of a trick, but another way using re.findall:

import re

def longest(string):
    return max(len(i) for i in re.findall("w ", string, flags=re.I))

new_df = df.groupby("Player_id")["Match_Result"].sum().apply(longest)

You can then use pandas.Series.nlargest to get the desired output:

new_df.nlargest(1)

Output:

Player_id
403    5
Name: Match_Result, dtype: int64

CodePudding user response：

Filter groups crated by cumulative sums with compare not equal W with SeriesGroupBy.value_counts and then get max value with player_id by Series.agg with Series.idxmax and max:

m = df['Match_Result'].str.upper().ne('W')

s = m.cumsum()[~m].groupby(df['Player_id']).value_counts().reset_index(level=1, drop=True)

df = s.agg({'player_id': 'idxmax', 'longest_winningstreak':'max'}).to_frame(0).T
print (df)
   player_id  longest_winningstreak
0        403                      5

Solution per months:

df['Match_Date'] = pd.to_datetime(df['Match_Date'])

m = df['Match_Result'].ne('W')

s = (m.cumsum()[~m].groupby([df['Player_id'], df['Match_Date'].dt.to_period('m')])
       .value_counts()
       .reset_index(level=-1, drop=True))

df1 = (s.groupby(level=0)
        .agg([('period',lambda x: x.idxmax()[1]),('longest_winningstreak','max')]))

print (df1)
            period  longest_winningstreak
Player_id                                
401        2021-06                      2
402        2021-06                      2
403        2021-05                      5
404        2021-05                      4
405        2021-06                      2