My goal is to replace all the numbers of the dataframe by their current positive streak number. It works well but the coding is messy because I do it one column by one column. And I always mention the column name manually. So I guess there is a better way to do it with just a small part of coding.
Would you have an idea how to simplify my code by any chance ?
df = pd.DataFrame([[9, 5, 2], [-2, 6, -4], [-5, 1, -1], [9, 6, -5], [7, -1, -3], [6, -4, 1],
[2, -9, 3]],
columns=['A', 'B', 'C'], index=[1, 2, 3, 4, 5, 6, 7])
def streaks(df, col):
sign = np.sign(df[col])
s = sign.groupby((sign!=sign.shift()).cumsum()).cumsum()
return df.assign(A=s.where(s>0, 0.0).abs())
df = streaks(df, 'A')
def streaks(df, col):
sign = np.sign(df[col])
s = sign.groupby((sign!=sign.shift()).cumsum()).cumsum()
return df.assign(B=s.where(s>0, 0.0).abs())
df = streaks(df, 'B')
def streaks(df, col):
sign = np.sign(df[col])
s = sign.groupby((sign!=sign.shift()).cumsum()).cumsum()
return df.assign(C=s.where(s>0, 0.0).abs())
df = streaks(df, 'C')
CodePudding user response:
You can use a single function:
With apply:
def streaks(col):
sign = np.sign(col)
s = sign.groupby((sign!=sign.shift()).cumsum()).cumsum()
return s.where(s>0, 0.0).abs()
df = df.apply(streaks)
Or modifying your original approach:
def streaks(df, col):
sign = np.sign(df[col])
s = sign.groupby((sign!=sign.shift()).cumsum()).cumsum()
return df.assign(**{col: s.where(s>0, 0.0).abs()})
df = streaks(df, 'A')
df = streaks(df, 'B')
df = streaks(df, 'C')
Or modifying in place:
def streaks(df, col):
sign = np.sign(df[col])
s = sign.groupby((sign!=sign.shift()).cumsum()).cumsum()
df[col] = s.where(s>0, 0.0).abs()
streaks(df, 'A')
streaks(df, 'B')
streaks(df, 'C')
print(df)