How can I customize the error bars in matplotlib-CodePudding

I intend to plot all possible information in my plot including mean, standard deviation, and MSE together with referencing each point on the plot.

from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
import numpy as np
import matplotlib.pyplot as plt

For the sake of simplicity, let's assume I only have three points.

true = np.array([[1047.]
 [ 953.]
 [1073.]])
pred = np.array([[ -69.921265]
 [-907.8611  ]
 [ 208.98877 ]])

my_mae= mae(true, pred) #mean absolute error
my_mse= mse(true,pred) #mean squared error

err = abs(true - pred) #get the error per point
mean_err = np.mean(err) #calculate the mean
sd_err  = np.std(err) #calculate the standard deviation

Then, I plot my error bar.

dy= 100

plt.errorbar(true,pred, yerr=dy, fmt='o', color='black',ecolor='red', elinewidth=3, capsize=0);

First, I'd like to somehow reference each error bar to see which data point it references. Second, I'd like to add all four pieces of information to the plot. I'd appreciate any help.

CodePudding user response：

Here you go, if this solved your question consider accepting the answer:

from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
import numpy as np
import matplotlib.pyplot as plt

true = np.array([[1047.],
 [ 953.],
 [1073.]])
pred = np.array([[ -69.921265],
 [-907.8611  ],
 [ 208.98877 ]])

my_mae= mae(true, pred) #mean absolute error
my_mse= mse(true,pred) #mean squared error

err = abs(true - pred) #get the error per point
mean_err = np.mean(err) #calculate the mean
sd_err  = np.std(err) #calculate the standard deviation

dy= 100

for i, z in enumerate (pred,1):
    plt.errorbar(true,pred, yerr=dy, fmt='o', color='black',ecolor='red', elinewidth=3, capsize=0, zorder=3);
    plt.annotate(i, (true[i-1], pred[i-1]),fontsize=20, color='blue')
    
label_1=['my_mae','my_mse', 'mean_err', 'sd_err']
label_2=[my_mae,my_mse, mean_err,sd_err]

for q,w in zip(label_1, label_2): 
    plt.plot([], [],'o', label=(f'{q}: {w}'))

plt.legend(loc='lower right')

This what you should get:

CodePudding user response：

I changed the data to random to simulate your 400 rows of data. It takes roughly 0.1 s to finish plotting 400 data points.

from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
import numpy as np
import matplotlib.pyplot as plt
import time

true = np.arange(400)
pred = np.random.rand(400, 1) * 1000
number = np.arange(1, 401)

my_mae= mae(true, pred) #mean absolute error
my_mse= mse(true,pred) #mean squared error

err = abs(true - pred) #get the error per point
mean_err = np.mean(err) #calculate the mean
sd_err  = np.std(err) #calculate the standard deviation

dy= 100

plt.close()
fig, ax = plt.subplots(figsize = (13,5))
ax.set_xlim(0, 400)
ax.grid()

######
t1 = time.time()
######
plt.errorbar(true,pred, yerr=dy, fmt='o', color='black',ecolor='red', elinewidth=3, capsize=0)
for i in range(400):
    ax.annotate(f'{i 1}', (true[i] 10, pred[i]))
ax.annotate(f'mae = {round(my_mae, 2)}, mse = {round(my_mse, 2)}, mean error = {round(mean_err, 2)}, standard deviation = {round(sd_err, 2)}', (190, -130))
#######
t2 = time.time()
#######
plt.tight_layout()
print('time needed:', t2-t1, 's')

Output:

time needed: 0.11669778823852539 s