I'm trying to utilize a custom scorer with the following code
def edge_score(y, y_pred):
y_pred.name = 'y_pred'
y.name = 'y'
df = pd.concat([y_pred, y])
df['sign_pred'] = df.y_pred.apply(np.sign)
df['sign_true'] = df.y.apply(np.sign)
df['is_correct'] = 0
df.loc[
df.sign_pred * df.sign_true > 0, 'is_correct'] = 1
df['is_incorrect'] = 0
df.loc[
df.sign_pred * df.sign_true < 0, 'is_incorrect'] = 1
df['is_predicted'] = df.is_correct df.is_incorrect
df['result'] = df.sign_pred * df.y
df['edge'] = df.result.mean()
output_errors = df[['edge']]
output_errors.to_numpy()
return np.average(output_errors)
edge = make_scorer(edge_score)
I get the following error
AttributeError: 'numpy.ndarray' object has no attribute 'name'
When I comment out the .name lines, I get the following error
TypeError: cannot concatenate object of type '<class 'numpy.ndarray'>'; only Series and DataFrame objs are valid
When I convert true and predictions to dataframe, I get the following error
y_pred = pd.DataFrame(y_pred)
y = pd.DataFrame(y)
AttributeError: 'DataFrame' object has no attribute 'y_pred'
CodePudding user response:
Change these lines of code
df['sign_pred'] = df.y_pred.apply(np.sign)
df['sign_true'] = df.y.apply(np.sign)
to these:
df['sign_pred'] = np.sign(y_pred)
df['sign_true'] = np.sign(y)
CodePudding user response:
You should first create a DataFrame with the two numpy arrays y
and y_pred
, and then perform all the operations.
def edge_score(y, y_pred):
df = pd.DataFrame({"y":y,
"y_pred":y_pred})
df['sign_pred'] = df.y_pred.apply(np.sign)
df['sign_true'] = df.y.apply(np.sign)
df['is_correct'] = 0
df.loc[
df.sign_pred * df.sign_true > 0, 'is_correct'] = 1
df['is_incorrect'] = 0
df.loc[
df.sign_pred * df.sign_true < 0, 'is_incorrect'] = 1
df['is_predicted'] = df.is_correct df.is_incorrect
df['result'] = df.sign_pred * df.y
df['edge'] = df.result.mean()
output_errors = df[['edge']]
output_errors.to_numpy()
return np.average(output_errors)
edge = make_scorer(edge_score)