I have several list to iterate over and one of them is list of strings
['model_1', 'model_2', 'model_3']
When I call the function with the following code
for n, m, f, p, s in zip(model_name, mean_predicted_value, fraction_of_positives, prob_pos, score):
It throws the following error
only integer scalar arrays can be converted to a scalar index
How should I deal with string list in that case?
I read about enumerate(1, len(list))
, but I guess it's not an option inside zip()
Full-code and error:
def plot_calibration(model_name, mean_predicted_value, fraction_of_positives, prob_pos, score):
"""
:param model_name: name of a model to place on a plot
:param mean_predicted_value: mean predicted values of a model from calibration_curve()
:param fraction_of_positives: fraction of positive values of a model from calibration_curve()
:param prob_pos: predicted probabilities for positive label from a model
:param score: model's Brier score value
"""
plt.figure(figsize=(10, 10))
ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
ax2 = plt.subplot2grid((3, 1), (2, 0))
ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
print(model_name)
for name, mpv, fop, pr, sc in zip(model_name, mean_predicted_value, fraction_of_positives, prob_pos, score):
ax1.plot(mean_predicted_value[mpv], fraction_of_positives[fop], "s-",
label="%s (%0.3f)" % (model_name[name], score[sc]))
ax2.hist(prob_pos[pr], bins=10, label="%s" % (model_name[name],),
histtype="step", lw=2, )
ax1.set_ylabel("Fraction of positives")
ax1.set_ylim([-0.05, 1.05])
ax1.legend(loc="lower right")
ax1.set_title("Calibration plots (reliability curve)")
ax2.set_xlabel("Mean predicted value")
ax2.set_ylabel("Count")
ax2.legend(loc="upper center", ncol=2)
plt.tight_layout()
models_dict = {'model': [scores[0], models[0]],
'sigmoid': [scores[1], models[1]],
'isotonic': [scores[2], models[2]]}
best_model = min(models_dict.items(), key=lambda x: x[1][0])[1][1]
plot_calibration(list(models_dict.keys()),
fraction_of_positives, # float list
mean_predicted_value, # float list
probs_pos, # float list
scores) # float list
error:
/tmp/ipykernel_11471/2935120748.py in probability_calibration(model, X_train, X_val, X_test, features_list, prefit)
95 mean_predicted_value,
96 probs_pos,
---> 97 scores)
98
99 return best_model
/tmp/ipykernel_11471/2935120748.py in plot_calibration(model_name, mean_predicted_value, fraction_of_positives, prob_pos, score)
19 print(model_name)
20 for name, mpv, fop, pr, sc in zip(model_name, mean_predicted_value, fraction_of_positives, prob_pos, score):
---> 21 ax1.plot(mean_predicted_value[mpv], fraction_of_positives[fop], "s-",
22 label="%s (%0.3f)" % (model_name[name], score[sc]))
23 ax2.hist(prob_pos[pr], bins=10, label="%s" % (model_name[name],),
TypeError: only integer scalar arrays can be converted to a scalar index
CodePudding user response:
I think your problem is originated in calling names list by the name,
enumerate gives you an index and your code direct access to the elements
I think would be god to do (changes only in lines ax1.plot and ax2.plot:
def plot_calibration(model_name, mean_predicted_value, fraction_of_positives, prob_pos, score):
"""
:param model_name: name of a model to place on a plot
:param mean_predicted_value: mean predicted values of a model from calibration_curve()
:param fraction_of_positives: fraction of positive values of a model from calibration_curve()
:param prob_pos: predicted probabilities for positive label from a model
:param score: model's Brier score value
"""
plt.figure(figsize=(10, 10))
ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
ax2 = plt.subplot2grid((3, 1), (2, 0))
ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
print(model_name)
for name, mpv, fop, pr, sc in zip(model_name, mean_predicted_value, fraction_of_positives, prob_pos, score):
ax1.plot(mpv, fop, "s-",
label="%s (%0.3f)" % (name, sc))
ax2.hist(pr, bins=10, label="%s" % (name),
histtype="step", lw=2, )
ax1.set_ylabel("Fraction of positives")
ax1.set_ylim([-0.05, 1.05])
ax1.legend(loc="lower right")
ax1.set_title("Calibration plots (reliability curve)")
ax2.set_xlabel("Mean predicted value")
ax2.set_ylabel("Count")
ax2.legend(loc="upper center", ncol=2)
plt.tight_layout()
models_dict = {'model': [scores[0], models[0]],
'sigmoid': [scores[1], models[1]],
'isotonic': [scores[2], models[2]]}
best_model = min(models_dict.items(), key=lambda x: x[1][0])[1][1]
plot_calibration(list(models_dict.keys()),
fraction_of_positives, # float list
mean_predicted_value, # float list
probs_pos, # float list
scores) # float list
In this code you directly use the element you iterate and the is no need to call the list