What is the problem with my Gradient Descent Algorithm or how its applied?-CodePudding

I've been trying figure out what I have done wrong for many hours, but just can't figure out. I've even looked at other basic Neural Network libraries to make sure that my gradient descent algorithms were correct, but it still isn't working.

I'm trying to teach it XOR but it outputs -

input (0 0) | 0.011441891321516094
input (1 0) | 0.6558508610135193
input (0 1) | 0.6558003273099053
input (1 1) | 0.6563021185296245

after 1000 trainings, so clearly there's something wrong.

The code is written in lua and I created the Neural Network from raw data so you can easily understand how the data is formatted.

- Training code -

math.randomseed(os.time())

local nn =  require("NeuralNetwork")

local network = nn.newFromRawData({
    ["activationFunction"] = "sigmoid",
    ["learningRate"] = 0.3,
    ["net"] = {
        [1] = {
            [1] = {
                ["value"] = 0
            },
            [2] = {
                ["value"] = 0
            }
        },
        [2] = {
            [1] = {
                ["bias"] = 1,
                ["netInput"] = 0,
                ["value"] = 0,
                ["weights"] = {
                    [1] = 1,
                    [2] = 1
                }
            },
            [2] = {
                ["bias"] = 1,
                ["netInput"] = 0,
                ["value"] = 0,
                ["weights"] = {
                    [1] = 1,
                    [2] = 1
                }
            },
            [3] = {
                ["bias"] = 1,
                ["netInput"] = 0,
                ["value"] = 0,
                ["weights"] = {
                    [1] = 1,
                    [2] = 1
                }
            },
            [4] = {
                ["bias"] = 1,
                ["netInput"] = 0,
                ["value"] = 0,
                ["weights"] = {
                    [1] = 1,
                    [2] = 1
                }
            }
        },
        [3] = {
            [1] = {
                ["bias"] = 1,
                ["netInput"] = 0,
                ["value"] = 0,
                ["weights"] = {
                    [1] = 1,
                    [2] = 1,
                    [3] = 1,
                    [4] = 1
                }
            }
        }
    }
})

attempts = 1000
for i = 1,attempts do
    network:backPropagate({0,0},{0}) 
    network:backPropagate({1,0},{1})
    network:backPropagate({0,1},{1})
    network:backPropagate({1,1},{0})
end

print("Results:")
print("input (0 0) | "..network:feedForward({0,0})[1])
print("input (1 0) | "..network:feedForward({1,0})[1])
print("input (0 1) | "..network:feedForward({0,1})[1])
print("input (1 1) | "..network:feedForward({1,1})[1])

- Library -

local nn = {}
nn.__index = nn

nn.ActivationFunctions = {
    sigmoid = function(x) return 1/(1 math.exp(-x/1)) end,
    ReLu = function(x) return math.max(0, x) end,
}
nn.Derivatives = {
    sigmoid = function(x) return x * (1 - x) end,
    ReLu = function(x) return x > 0 and 1 or 0 end,
}
nn.CostFunctions = {
    MSE = function(outputs, expected)
        local sum = 0
        for i = 1, #outputs do
            sum  = 1/2*(expected[i] - outputs[i])^2
        end
        return sum/#outputs
    end,
}

function nn.new(inputs, outputs, hiddenLayers, neurons, learningRate, activationFunction)
    
    local self = setmetatable({}, nn)
    
    self.learningRate = learningRate or .3
    self.activationFunction = activationFunction or "ReLu"
    self.net = {}
    
    local net = self.net
    local layers = hiddenLayers 2
    
    for i = 1, layers do
        net[i] = {}
    end
    
    for i = 1, inputs do
        net[1][i] = {value = 0}
    end
    for i = 2, layers-1 do
        for x = 1, neurons do
            net[i][x] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
            for z = 1, #net[i-1] do
                net[i][x].weights[z] = math.random()*2-1
            end
        end
    end
    for i = 1, outputs do
        net[layers][i] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
        for z = 1, #net[layers-1] do
            net[layers][i].weights[z] = math.random()*2-1
        end
    end
    
    return self
    
end

function nn.newFromRawData(data)
    
    return setmetatable(data, nn)
    
end

function nn:feedForward(inputs)
    
    local net = self.net
    local activation = self.activationFunction
    local layers = #net
    
    local inputLayer = net[1]
    local outputLayer = net[layers]
    
    
    for i = 1, #inputLayer do
        inputLayer[i].value = inputs[i]
    end
    
    for i = 2, layers do
        local layer = net[i]
        for x = 1, #layer do
            local sum = layer[x].bias
            for z = 1, #net[i-1] do
                sum  = net[i-1][z].value * layer[x].weights[z]
            end
            layer[x].netInput = sum
            layer[x].value = nn.ActivationFunctions[activation](sum)
        end 
    end
    
    local outputs = {}
    
    for i = 1, #outputLayer do
        table.insert(outputs, outputLayer[i].value)
    end
    
    return outputs
    
end

function nn:backPropagate(inputs, expected)
    
    local outputs = self:feedForward(inputs)
    
    local net = self.net
    local activation = self.activationFunction
    local layers = #net
    local lr = self.learningRate
    
    local inputLayer = net[1]
    local outputLayer = net[layers]
    
    for i = 1, #outputLayer do
        local delta = -(expected[i] - outputs[i]) * nn.Derivatives[activation](net[layers][i].value)
        outputLayer[i].delta = delta    
    end
    
    for i = layers-1, 2, -1 do
        local layer = net[i]
        local nextLayer = net[i 1]
        for x = 1, #layer do
            local delta = 0
            for z = 1, #nextLayer do
                delta  = nextLayer[z].delta * nextLayer[z].weights[x]
            end
            layer[x].delta = delta * nn.Derivatives[activation](layer[x].value)
        end
    end
    
    for i = 2, layers do
        local lastLayer = net[i-1]
        for x = 1, #net[i] do
            net[i][x].bias -= lr * net[i][x].delta
            for z = 1, #lastLayer do
                net[i][x].weights[z] -= lr * net[i][x].delta * lastLayer[z].value       
            end
        end
    end
    
end

return nn

Any help would be highly appreciated, thanks!

CodePudding user response：

All initial weights must be DIFFERENT numbers, otherwise backpropagation will not work. For example, you can replace 1 with math.random()
Increase number of attempts to 10000

With these modifications, your code works fine:

Results:
input (0 0) | 0.028138230938126
input (1 0) | 0.97809448578087
input (0 1) | 0.97785000216126
input (1 1) | 0.023128477689456