I've been trying figure out what I have done wrong for many hours, but just can't figure out. I've even looked at other basic Neural Network libraries to make sure that my gradient descent algorithms were correct, but it still isn't working.
I'm trying to teach it XOR but it outputs -
input (0 0) | 0.011441891321516094
input (1 0) | 0.6558508610135193
input (0 1) | 0.6558003273099053
input (1 1) | 0.6563021185296245
after 1000 trainings, so clearly there's something wrong.
The code is written in lua and I created the Neural Network from raw data so you can easily understand how the data is formatted.
- Training code -
math.randomseed(os.time())
local nn = require("NeuralNetwork")
local network = nn.newFromRawData({
["activationFunction"] = "sigmoid",
["learningRate"] = 0.3,
["net"] = {
[1] = {
[1] = {
["value"] = 0
},
[2] = {
["value"] = 0
}
},
[2] = {
[1] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
},
[2] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
},
[3] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
},
[4] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1
}
}
},
[3] = {
[1] = {
["bias"] = 1,
["netInput"] = 0,
["value"] = 0,
["weights"] = {
[1] = 1,
[2] = 1,
[3] = 1,
[4] = 1
}
}
}
}
})
attempts = 1000
for i = 1,attempts do
network:backPropagate({0,0},{0})
network:backPropagate({1,0},{1})
network:backPropagate({0,1},{1})
network:backPropagate({1,1},{0})
end
print("Results:")
print("input (0 0) | "..network:feedForward({0,0})[1])
print("input (1 0) | "..network:feedForward({1,0})[1])
print("input (0 1) | "..network:feedForward({0,1})[1])
print("input (1 1) | "..network:feedForward({1,1})[1])
- Library -
local nn = {}
nn.__index = nn
nn.ActivationFunctions = {
sigmoid = function(x) return 1/(1 math.exp(-x/1)) end,
ReLu = function(x) return math.max(0, x) end,
}
nn.Derivatives = {
sigmoid = function(x) return x * (1 - x) end,
ReLu = function(x) return x > 0 and 1 or 0 end,
}
nn.CostFunctions = {
MSE = function(outputs, expected)
local sum = 0
for i = 1, #outputs do
sum = 1/2*(expected[i] - outputs[i])^2
end
return sum/#outputs
end,
}
function nn.new(inputs, outputs, hiddenLayers, neurons, learningRate, activationFunction)
local self = setmetatable({}, nn)
self.learningRate = learningRate or .3
self.activationFunction = activationFunction or "ReLu"
self.net = {}
local net = self.net
local layers = hiddenLayers 2
for i = 1, layers do
net[i] = {}
end
for i = 1, inputs do
net[1][i] = {value = 0}
end
for i = 2, layers-1 do
for x = 1, neurons do
net[i][x] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
for z = 1, #net[i-1] do
net[i][x].weights[z] = math.random()*2-1
end
end
end
for i = 1, outputs do
net[layers][i] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
for z = 1, #net[layers-1] do
net[layers][i].weights[z] = math.random()*2-1
end
end
return self
end
function nn.newFromRawData(data)
return setmetatable(data, nn)
end
function nn:feedForward(inputs)
local net = self.net
local activation = self.activationFunction
local layers = #net
local inputLayer = net[1]
local outputLayer = net[layers]
for i = 1, #inputLayer do
inputLayer[i].value = inputs[i]
end
for i = 2, layers do
local layer = net[i]
for x = 1, #layer do
local sum = layer[x].bias
for z = 1, #net[i-1] do
sum = net[i-1][z].value * layer[x].weights[z]
end
layer[x].netInput = sum
layer[x].value = nn.ActivationFunctions[activation](sum)
end
end
local outputs = {}
for i = 1, #outputLayer do
table.insert(outputs, outputLayer[i].value)
end
return outputs
end
function nn:backPropagate(inputs, expected)
local outputs = self:feedForward(inputs)
local net = self.net
local activation = self.activationFunction
local layers = #net
local lr = self.learningRate
local inputLayer = net[1]
local outputLayer = net[layers]
for i = 1, #outputLayer do
local delta = -(expected[i] - outputs[i]) * nn.Derivatives[activation](net[layers][i].value)
outputLayer[i].delta = delta
end
for i = layers-1, 2, -1 do
local layer = net[i]
local nextLayer = net[i 1]
for x = 1, #layer do
local delta = 0
for z = 1, #nextLayer do
delta = nextLayer[z].delta * nextLayer[z].weights[x]
end
layer[x].delta = delta * nn.Derivatives[activation](layer[x].value)
end
end
for i = 2, layers do
local lastLayer = net[i-1]
for x = 1, #net[i] do
net[i][x].bias -= lr * net[i][x].delta
for z = 1, #lastLayer do
net[i][x].weights[z] -= lr * net[i][x].delta * lastLayer[z].value
end
end
end
end
return nn
Any help would be highly appreciated, thanks!
CodePudding user response:
- All initial weights must be DIFFERENT numbers, otherwise backpropagation will not work. For example, you can replace
1
withmath.random()
- Increase number of attempts to
10000
With these modifications, your code works fine:
Results:
input (0 0) | 0.028138230938126
input (1 0) | 0.97809448578087
input (0 1) | 0.97785000216126
input (1 1) | 0.023128477689456