Error Derivative in Backpropagation neural network from scracth-CodePudding

First, I created the next forward propagate function

# save activations per layer
activations = []
for i in range(len(layers)):
   a = np.zeros(layers[i])
   activations.append(a)
self.activations = activations

def forward_propagate(self, inputs):
   # the input layer activation is just the input itself
   activations = inputs

   # save the activations for backpropogation
   self.activations[0] = activations

   # iterate through the network layers
   for b, w in zip(self.bias,self.weights):
      for i in range(len(self.pesos)-1):
        net_inputs = self._sigmoid(np.dot(activations[i-1],w) b)
        self.activations[i 1]=net_inputs 
   return activaciones

After that, I'd like to train the newtork so I design the next funcion

def train(self, inputs, targets, epochs, learning_rate):
   # now enter the training loop
   for i in range(epochs):
      sum_errors = 0

      # iterate through all the training data
      for j, input in enumerate(inputs):
         target = targets[j]

         # activate the network!
         output = self.forward_propagate(input)

         error = (2*(target - output))/len(output) #derivative of MSE

         self.back_propagate(error)

Finally. I created the backpropagate function that received the error as an input, where the error es the derivative of the MSE, after that the first derivate I wanna calculate is dE/dW_[i] as you can see bellow

dW=[]
db=[]
for i in range(len(layers)-1):
   derW=np.zeros((layers[i], layers[i 1]))
   derb=np.zeros((layers[i 1])).reshape(1, layers[i 1])
   dW.append(derW)  
   db.append(derb)
self.dW=dW  
self.db=db

def back_propagate(self, error):
    #dE/dW_i=2(y-a_[i 1])/m*s'(h_[i 1])a_[i] donnde (y-a_[i 1]) derivative of MSE
    #s'(h_[i 1])=s(h_[i 1])*(1-s(h_[i 1])) derivative of sigmoid activacion function
    #s(h_[i 1])=a_[i 1]
    #delta=2(y-a_[i 1])/m*s'(h_[i 1])
    #dE/dW_[i-1]=(y-a_[i 1])s'(h_[i 1])*W_[i]*s'(h_[i])a_[i]
    for i in reversed(range(len(self.dW))): 
      # get activation for previous layer
      activations = self.activations[i 1]

      # apply sigmoid derivative function
      delta = error*self._sigmoid_derivative(activations)

      # reshape delta as to have it as a 2d array
      delta_re=delta.reshape(delta.shape[0], -1).T

      # get activations for current layer
      current_activations = self.activations[i]

      # reshape activations as to have them as a 2d column matrix
      current_activations = current_activations.reshape(current_activations.shape[0],-1)

      # save derivative after applying matrix multiplication
      self.dW[i] = np.dot(current_activations, delta_re)

      # backpropogate the next error
      error = np.dot(delta, self.weights[i].T)

but when I run the code with the next data

# create a dataset to train a network for the sum operation
items = np.array([[random()/2 for _ in range(2)] for _ in range(1000)])
targets = np.array([[i[0]   i[1]] for i in items])

# create a Multilayer Perceptron with one hidden layer
mlp = MLP(2, [5], 1)

# train network
mlp.train(items, targets, 50, 0.1)

I have the next error

---> 69       error = np.dot(delta, self.weights[i].T)
shapes (2,) and (1,5) not aligned: 2 (dim 0) != 1 (dim 0)

I understand the error but not how to fix it. Any help?

CodePudding user response：

The error is occurring because you are trying to multiply a vector of shape (2, ) with a matrix of shape (1,5). In order for matrix multiplication to be defined, the number of columns in the first matrix must equal the number of rows in the second matrix, hence the error.

CodePudding user response：

I think I can get a solution with the next code

def back_propagate(self,error):
   for i in reversed(range(len(self.dW))):
      activations=self.activations[i 1]
      delta = np.multiply(self._sigmoid_derivative(activactions), error)
      m=delta.shape[0]
      delta_re=delta.reshape(delta.shape[0], -1)
      current_activations=self.activations[i]
      self.dW[i] = 1/m*np.dot(delta_re, current_activations.T)
      self.db[i] = 1/m*np.sum(delta_re, axis=1, keepdims=True)
      error_prev = np.dot(self.weights[i-1].T, delta)
   return error_prev

But i'm not sure 'cause when i change the name of "error_prev" to "error" i get the next mistake

---> 76       self.dW[i] = 1/m*np.dot(delta_re, activaciones_actuales.T)
shapes (5,5) and (2,) not aligned: 5 (dim 1) != 2 (dim 0)

Can anyone explain why when I change the name I get that mistake?