after running this code I keep getting the same error:
note:(the data is in excel file (Heights : 16 column) and (Wights:16 column)
I tried to change the epochs_num and it keeps giving the same problem...
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Load the dataset
data = pd.read_csv('heights_weights.csv')
# Plot the data distribution
plt.scatter(data['Height'], data['Weight'], color='b')
plt.xlabel('Height')
plt.ylabel('Weight')
plt.title('Height vs. Weight')
plt.show()
# Define the linear regression model
def linearRegression_model(X, weights):
y_pred = np.dot(X, weights)
return y_pred
# Define the update weights function
def linearRegression_update_weights(X, y, weights, learning_rate):
y_pred = linearRegression_model(X, weights)
weights_delta = np.dot(X.T, y_pred - y)
m = len(y)
weights -= (learning_rate/m) * weights_delta
return weights
# Define the train function
def linearRegression_train(X, y, learning_rate, num_epochs):
# Initialize weights and bias
weights = np.zeros(X.shape[1])
for epoch in range(num_epochs):
weights = linearRegression_update_weights(X, y, weights, learning_rate)
if (epoch % 100 == 0):
print('epoch: %s, weights: %s' % (epoch, weights))
return weights
# Define the predict function
def linearRegression_predict(X, weights):
y_pred = linearRegression_model(X, weights)
return y_pred
# Define the mean squared error function
def mean_squared_error(y_true, y_pred):
mse = np.mean(np.power(y_true-y_pred, 2))
return mse
# Prepare the data
X = data['Height'].values.reshape(-1, 1)
y = data['Weight'].values.reshape(-1, 1)
# Train the model
lr = 0.01
n_epochs = 1000
weights = linearRegression_train(X, y, lr, n_epochs)
# Predict
y_pred = linearRegression_predict(X, weights)
# Evaluate the model
mse = mean_squared_error(y, y_pred)
print('Mean Squared Error: %s' % mse)
# Plot the regression line
plt.scatter(data['Height'], data['Weight'], color='b')
plt.plot(X, y_pred, color='k')
plt.xlabel('Height')
plt.ylabel('Weight')
plt.title('Height vs. Weight')
plt.show()
# Plot the predicted and actual values
plt.scatter(data['Height'], y, color='b', label='Actual')
plt.scatter(data['Height'], y_pred, color='r', label='Predicted')
plt.xlabel('Height')
plt.ylabel('Weight')
plt.title('Actual vs. Predicted')
plt.legend()
plt.show()
i try the same code to run step by step in google colab and i also change the epochs to 62 and run it many times but still the same :
ValueError Traceback (most recent call last)
<ipython-input-23-98703406a0a3> in <module>
2 learning_rate = 0.01
3 num_epochs = 62
----> 4 weights = linearRegression_train(X, y, learning_rate, num_epochs)
1 frames
<ipython-input-12-8f66dacdd5fc> in linearRegression_update_weights(X, y, weights, learning_rate)
4 weights_delta = np.dot(X.T, y_pred - y)
5 m = len(y)
----> 6 weights -= (learning_rate/m) * weights_delta
7 return weights
ValueError: non-broadcastable output operand with shape (1,) doesn't match the broadcast shape (1,15)
CodePudding user response:
I can reproduce the error message with
In [5]: x=np.array([1])
In [6]: x =np.ones((1,5),int)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [6], in <cell line: 1>()
----> 1 x =np.ones((1,5),int)
ValueError: non-broadcastable output operand with shape (1,) doesn't match the broadcast shape (1,5)
CodePudding user response:
In linearRegression_update_weights
, weights.shape == (1,)
but weights_delta.shape == (1, 15)
so the in-place subtraction fails. The shape of weights_delta
is wrong because y_pred.shape == (15,)
but y.shape == (15, 1)
so (y_pred - y).shape == (15, 15)
because of broadcasting. This results in the wrong shape of weights_delta
after multiplied by X.T
. The fix is to ensure y
is a 1-D array to match the shape of y_pred
, preventing broadcasting:
y = data['Weight'].values.reshape(-1)