Use list of strings in zip() as iterator index-CodePudding

I have several list to iterate over and one of them is list of strings

['model_1', 'model_2', 'model_3']

When I call the function with the following code

for n, m, f, p, s in zip(model_name, mean_predicted_value, fraction_of_positives, prob_pos, score):

It throws the following error

only integer scalar arrays can be converted to a scalar index

How should I deal with string list in that case?

I read about enumerate(1, len(list)), but I guess it's not an option inside zip()

Full-code and error:



def plot_calibration(model_name, mean_predicted_value, fraction_of_positives, prob_pos, score):
    """
    :param model_name: name of a model to place on a plot
    :param mean_predicted_value:  mean predicted values of a model from calibration_curve()
    :param fraction_of_positives:  fraction of positive values of a model from calibration_curve()
    :param prob_pos: predicted probabilities for positive label from a model
    :param score: model's Brier score value
    """
    plt.figure(figsize=(10, 10))
    ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
    ax2 = plt.subplot2grid((3, 1), (2, 0))
    ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
    
    print(model_name)
    for name, mpv, fop, pr, sc in zip(model_name, mean_predicted_value, fraction_of_positives, prob_pos, score):
        ax1.plot(mean_predicted_value[mpv], fraction_of_positives[fop], "s-",
                 label="%s (%0.3f)" % (model_name[name], score[sc]))
        ax2.hist(prob_pos[pr], bins=10, label="%s" % (model_name[name],),
                 histtype="step", lw=2, )

    ax1.set_ylabel("Fraction of positives")
    ax1.set_ylim([-0.05, 1.05])
    ax1.legend(loc="lower right")
    ax1.set_title("Calibration plots  (reliability curve)")

    ax2.set_xlabel("Mean predicted value")
    ax2.set_ylabel("Count")
    ax2.legend(loc="upper center", ncol=2)
    plt.tight_layout()


models_dict = {'model': [scores[0], models[0]],
               'sigmoid': [scores[1], models[1]],
               'isotonic': [scores[2], models[2]]}

best_model = min(models_dict.items(), key=lambda x: x[1][0])[1][1]

plot_calibration(list(models_dict.keys()), 
                 fraction_of_positives, # float list
                 mean_predicted_value, # float list
                 probs_pos, # float list
                 scores) # float list

error:

/tmp/ipykernel_11471/2935120748.py in probability_calibration(model, X_train, X_val, X_test, features_list, prefit)
     95                      mean_predicted_value,
     96                      probs_pos,
---> 97                      scores)
     98 
     99     return best_model

/tmp/ipykernel_11471/2935120748.py in plot_calibration(model_name, mean_predicted_value, fraction_of_positives, prob_pos, score)
     19     print(model_name)
     20     for name, mpv, fop, pr, sc in zip(model_name, mean_predicted_value, fraction_of_positives, prob_pos, score):
---> 21         ax1.plot(mean_predicted_value[mpv], fraction_of_positives[fop], "s-",
     22                  label="%s (%0.3f)" % (model_name[name], score[sc]))
     23         ax2.hist(prob_pos[pr], bins=10, label="%s" % (model_name[name],),

TypeError: only integer scalar arrays can be converted to a scalar index

CodePudding user response：

I think your problem is originated in calling names list by the name,

enumerate gives you an index and your code direct access to the elements

I think would be god to do (changes only in lines ax1.plot and ax2.plot:



def plot_calibration(model_name, mean_predicted_value, fraction_of_positives, prob_pos, score):
    """
    :param model_name: name of a model to place on a plot
    :param mean_predicted_value:  mean predicted values of a model from calibration_curve()
    :param fraction_of_positives:  fraction of positive values of a model from calibration_curve()
    :param prob_pos: predicted probabilities for positive label from a model
    :param score: model's Brier score value
    """
    plt.figure(figsize=(10, 10))
    ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
    ax2 = plt.subplot2grid((3, 1), (2, 0))
    ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
    
    print(model_name)
    for name, mpv, fop, pr, sc in zip(model_name, mean_predicted_value, fraction_of_positives, prob_pos, score):
        ax1.plot(mpv, fop, "s-",
                 label="%s (%0.3f)" % (name, sc))
        ax2.hist(pr, bins=10, label="%s" % (name),
                 histtype="step", lw=2, )

    ax1.set_ylabel("Fraction of positives")
    ax1.set_ylim([-0.05, 1.05])
    ax1.legend(loc="lower right")
    ax1.set_title("Calibration plots  (reliability curve)")

    ax2.set_xlabel("Mean predicted value")
    ax2.set_ylabel("Count")
    ax2.legend(loc="upper center", ncol=2)
    plt.tight_layout()


models_dict = {'model': [scores[0], models[0]],
               'sigmoid': [scores[1], models[1]],
               'isotonic': [scores[2], models[2]]}

best_model = min(models_dict.items(), key=lambda x: x[1][0])[1][1]

plot_calibration(list(models_dict.keys()), 
                 fraction_of_positives, # float list
                 mean_predicted_value, # float list
                 probs_pos, # float list
                 scores) # float list

In this code you directly use the element you iterate and the is no need to call the list