I wanna implement the backward propagation concept in python with the next code
class MLP(object):
def __init__(self, num_inputs=3, hidden_layers=[3, 3], num_outputs=2):
self.num_inputs = num_inputs
self.hidden_layers = hidden_layers
self.num_outputs = num_outputs
layers = [num_inputs] hidden_layers [num_outputs]
weights = []
bias = []
for i in range(len(layers) - 1):
w = np.random.rand(layers[i], layers[i 1])
b=np.random.randn(layers[i 1]).reshape(1, layers[i 1])
weights.append(w)
bias.append(b)
self.weights = weights
self.bias = bias
activations = []
for i in range(len(layers)):
a = np.zeros(layers[i])
activations.append(a)
self.activations = activations
def forward_propagate(self, inputs):
activations = inputs
self.activations[0] = activations
for i, w in enumerate(self.weights):
for j, b in enumerate(self.bias):
net_inputs = self._sigmoid((np.dot(activations, w) b))
self.activations[i 1] = net_inputs
return activations
def train(self, inputs, targets, epochs, learning_rate):
for i in range(epochs):
sum_errors = 0
for j, input in enumerate(inputs):
target = targets[j]
output = self.forward_propagate(input)
def _sigmoid(self, x):
y = 1.0 / (1 np.exp(-x))
return y
So I created the next dummy data in order to verify everything is correct
items = np.array([[random()/2 for _ in range(2)] for _ in range(1000)])
targets = np.array([[i[0] i[1]] for i in items])
mlp = MLP(2, [5], 1)
mlp.train(items, targets, 2, 0.1)
but when I run the code I have the next error
ValueError: shapes (2,) and (5,1) not aligned: 2 (dim 0) != 5 (dim 0)
I understand the error, but how to solve it?
CodePudding user response:
a couple of major problems with forward_propagate
:
- change
net_inputs
toactivations
- otherwise you always compute and return the activations from the first layer - remove
for j, b in enumerate(self.bias):
- biases from other layers have no business here - use
matmul
instead ofdot
so, something like
for i, w in enumerate(self.weights):
activations = self._sigmoid((np.matmul(activations, w) self.bias[i]))
self.activations[i 1] = activations
return activations
Also, be careful to note that this method receives 1D array, which converts to a matrix after the first matmul
. Matrixes are stored in self.activations
and a matrix is returned from the method.
This might or might not be what you want.