I'm studying neural networks and i'm trying to develop a simple perceptron multilayer neural network to aproximate a function using python.
The problem is that the neural network returns the mean value of the training set for every input. I've been looking the code for days trying to find out what is wrong but i can't see it.
Could some one shed a light on this?
Thank you in advance!
This is my code:
import pandas as pd
import numpy as np
class pmc_3_layers:
def __init__(self, n1, n2, n3):
# Number of elements in each layer
self.n1 = n1
self.n2 = n2
self.n3 = n3
# Inicializing weights with randon values (LeCun)
self.w = []
self.w.append(np.random.default_rng().uniform(-(2.4/n1), (2.4/n1),(n2, n1 1)))
self.w.append(np.random.default_rng().uniform(-(2.4/n1), (2.4/n1),(n3, n2 1)))
def forward(self, variables_updated):
# Sigmoid function
gfunc = np.vectorize(lambda a : 1/(1 np.exp(-a)))
# First layer
i1 = self.w[0]@variables_updated
y1 = gfunc(i1)
y1 = np.insert(y1, 0, -1, axis = 0)
# Second layer
i2 = self.w[1]@y1
y2 = gfunc(i2)
return i1, y1, i2, y2
def backward(self, variable, classe, i1, y1, i2, y2):
# Sigmoid function derivate
glinhafunc = np.vectorize(lambda a : np.exp(-a)/((1 np.exp(-a))**2))
# Second layer
glinha2 = glinhafunc(i2)
grad2 = (classe - y2)*glinha2
if y1.ndim <= 1:
self.w[1] = self.w[1] self.taxa_aprendizado*[email protected](1, -1)
else:
self.w[1] = self.w[1] self.taxa_aprendizado*[email protected]
# First layer
glinha1 = glinhafunc(i1)
if glinha1.ndim<=1:
grad1 = -glinha1.reshape(-1,1)@grad2*self.w[1][:, 1:]
else:
grad1 = -glinha1.T@grad2*self.w[1][:, 1:]
if grad1.ndim<=1:
self.w[0] = self.w[0] self.taxa_aprendizado*grad1.reshape(-1,1)@variable.reshape(1, -1)
else:
self.w[0] = self.w[0] self.taxa_aprendizado*[email protected](1, -1)
def eqm(self):
eqm = 0
for i in range(len(self.variables_updated)):
i1, y1, i2, y2 = self.forward(self.variables_updated[i])
for j in range(self.n3):
eqm = eqm - (((self.output[j] - y2[j])**2)/2)
eqm = eqm/len(self.variables_updated)
return eqm
def train(self, variables, output, taxa_aprendizado, precision):
self.output = output
self.variables = variables
self.variables_updated = np.insert(self.variables, 0, np.full((1, len(self.variables)), -1), axis = 1)
self.taxa_aprendizado = taxa_aprendizado
self.precision = precision
self.epoch = 1
print('The inicial weight matrices are:')
print(self.w)
print('\n')
while True:
print('\n')
print('######################################')
print('Starting a new epohc number %i' %(self.epoch))
print('\n')
previous_eqm = self.eqm()
for i in range(len(self.variables_updated)):
i1, y1, i2, y2 = self.forward(self.variables_updated[i])
self.backward(self.variables_updated[i], output[i], i1, y1, i2, y2)
current_eqm = self.eqm()
self.current_eqm = current_eqm
if (abs(current_eqm - previous_eqm) <= precision):
print("Training finished in %s epochs" % (self.epoch))
print('\n')
print("The final eqm was %f" % (self.current_eqm))
print('\n')
print('The final weight matrices are:')
print(self.w)
break
self.epoch = self.epoch 1
def predict(self, variables):
variables_updated = np.insert(variables, 0, np.full((1, len(variables)), -1), axis = 1)
resultados = np.zeros((len(variables_updated), self.n3))
for i in range(len(variables_updated)):
i1, y1, i2, y2 = self.forward(variables_updated[i])
for j in range(len(y2)):
resultados[i][j] = y2[j]
return resultados
CodePudding user response:
Made some changes and the code now works:
class pmc_3_layers:
def __init__(self, n1, n2, n3):
# Number of elements in each layer
self.n1 = n1
self.n2 = n2
self.n3 = n3
# Inicializing weights with randon values (LeCun)
self.w = []
self.w.append(np.random.default_rng().uniform(-(2.4/n1), (2.4/n1),(n2, n1 1)))
self.w.append(np.random.default_rng().uniform(-(2.4/n1), (2.4/n1),(n3, n2 1)))
def forward(self, variables_updated):
# Sigmoid function
gfunc = np.vectorize(lambda a : 1/(1 np.exp(-a)))
# First layer
i1 = np.atleast_2d(self.w[0]@variables_updated)
y1 = np.atleast_2d(gfunc(i1))
y1 = np.insert(y1, 0, -1, axis = 1)
# Second layer
i2 = np.atleast_2d(self.w[1]@y1.T)
y2 = np.atleast_2d(gfunc(i2))
return i1, y1, i2, y2
def backward(self, variable, classe, i1, y1, i2, y2):
variable = np.atleast_2d(variable)
classe = np.atleast_2d(classe)
# Sigmoid function derivate
glinhafunc = np.vectorize(lambda a : np.exp(-a)/((1 np.exp(-a))**2))
# Second layer
glinha2 = np.atleast_2d(glinhafunc(i2))
grad2 = np.atleast_2d((classe.T - y2)*glinha2)
self.w[1] = np.atleast_2d(self.w[1] self.taxa_aprendizado*grad2@y1)
# First layer
glinha1 = np.atleast_2d(glinhafunc(i1))
grad1 = np.atleast_2d(([email protected][1][:, 1:])*glinha1)
self.w[0] = np.atleast_2d(self.w[0] self.taxa_aprendizado*grad1.T@variable)
def eqm(self):
eqm = 0
for i in range(len(self.variables_updated)):
i1, y1, i2, y2 = self.forward(self.variables_updated[i])
eqm = eqm - (((self.output[i] - y2)**2)/2).sum()
eqm = eqm/len(self.variables_updated)
return eqm
def train(self, variables, output, taxa_aprendizado, precision):
self.output = output
self.variables = variables
self.variables_updated = np.insert(self.variables, 0, np.full((1, len(self.variables)), -1), axis = 1)
self.taxa_aprendizado = taxa_aprendizado
self.precision = precision
self.epoch = 1
print('The inicial weight matrices are:')
print(self.w)
print('\n')
while True:
if (self.epoch==1)|((self.epoch0)==0):
print('\n')
print('######################################')
print('Starting a new epoch number %i' %(self.epoch))
print('\n')
previous_eqm = self.eqm()
for i in range(len(self.variables_updated)):
i1, y1, i2, y2 = self.forward(self.variables_updated[i])
self.backward(self.variables_updated[i], output[i], i1, y1, i2, y2)
current_eqm = self.eqm()
self.current_eqm = current_eqm
if (self.epoch==1)|((self.epoch0)==0):
print('EQM difference:')
print(current_eqm - previous_eqm)
if (abs(current_eqm - previous_eqm) <= precision):
print("Training finished in %s epochs" % (self.epoch))
print('\n')
print("The final eqm was %f" % (self.current_eqm))
print('\n')
print('The final weight matrices are:')
print(self.w)
break
self.epoch = self.epoch 1
def predict(self, variables):
variables_updated = np.insert(variables, 0, np.full((1, len(variables)), -1), axis = 1)
resultados = np.zeros((len(variables_updated), self.n3))
for i in range(len(variables_updated)):
i1, y1, i2, y2 = self.forward(variables_updated[i])
for j in range(len(y2)):
resultados[i][j] = y2[j]
return resultados