Logistic regression (from scratch) predicts 0.5 for everything-CodePudding

Need help on writing my own logistic regression model. Input data (generated via sklearn.datasets.make_classification) looks like this (printed for 10 entries):

[[ 0.74186571 -1.69239663  2.06965145]
 [-1.80076727  0.59700581 -1.57159523]
 [ 1.0328198   0.62274582  0.90241322]
 [-0.63972474  2.12054103  1.30124807]
 [ 1.04275475 -0.86879077  1.08399317]
 [-1.12772782  0.26396098 -1.68130012]
 [ 0.92281318 -1.15431326  0.23868389]
 [-0.37260971 -0.97979894  1.65890322]
 [ 0.4513904   0.30502349  2.46449598]
 [-2.79502998  0.05500871 -2.47725562]]

Output like this:

[0. 0. 1. 1. 0. 1. 0. 0. 0. 0.]

Here is my code:

import numpy
import math

# import data
path = r"C:\Users\felix\sciebo2\Atom_working_dir\ML_sandbox\\" # put in global path here
x_logregdata = numpy.genfromtxt(path "x_logregdata.csv", delimiter=",")
y_logregdata = numpy.genfromtxt(path "y_logregdata.csv", delimiter=",")

class MyLogReg:

    def __init__(self, n):
        self._numinputs = n
        self._weights = numpy.random.rand(n)
        self._bias = 0 # or = numpy.random.rand(1)
        self._lam = 0.01 # lambda parameter for regularization

    def pred(self, x):

        return sigmoid(numpy.matmul(x, self._weights)   self._bias)

# -----------------------------------------------------------------------------
# functions

def logloss(net,x,y): # p=prediction, t=target
    p = sigmoid(numpy.matmul(x, net._weights)   net._bias) # (1000,3)x(3,1) becomes (1000,1)
    logloss_term = (numpy.matmul(-y.T, numpy.log(p)) - numpy.matmul((1-y.T),numpy.log(1-p)))/len(p) # transpose, so (1,1000)x(1000,1) becomes (1)
    regularization_term = numpy.mean(net._weights**2 * net._lam/2)
    J = logloss_term   regularization_term
    return J

def sigmoid(z):
    # z = b   w1x1   w2x2   ...   wnxn
    return 1/(1 numpy.exp(-z))

def train(net,x,t,epochs,lr):
    for epoch in range(epochs):
        # x comes in the shape of {n_samples, n_features}
        p = net.pred(x) # make predictions
        e = logloss(net,x,t)
        grad_w = (numpy.matmul(x.T,(p-t))   net._lam*net._weights)/len(p) # dim: (3,1)
        grad_b = delta_bi = numpy.mean(p-t)
        if (epoch 1)%100 == 0:
            print(f"Epoch {epoch 1} | Error: {e} with weights {NN._weights} and bias {NN._bias}")
            print(f"grad_w: {grad_w} --- grad_b: {grad_b}")
        # update weights and bias
        NN._weights  = lr*(-grad_w)
        NN._bias  = lr*(-grad_b)

NN = MyLogReg(3)

# # -----------------------------------------------------------------------------
# # training process
print(80*"--")
print("Training process started!\n")
print(numpy.shape(x_combined),numpy.shape(x_logregdata))
train(NN, x_logregdata, y_logregdata, 100, 0.5)

Training output at last epoch:

Epoch 1000 | Error: 0.6919551095870804 with weights [ 0.01013472  0.04960763 -0.06680454] and bias -0.012293556999268884
grad_w: [-4.62077479e-18  6.62621000e-18  7.92638524e-18] --- grad_b: -1.3322676295501879e-18

After training on the entire data set, predictions look random and are all close to 0.5, even though gradients have become very small:

for i in range(10):
    print((NN.pred(x_logregdata[i,:]), y_logregdata[i]))


OUT:
(0.4434940888958877, 0.0)
(0.5259920670963649, 0.0)
(0.49219604260551786, 1.0)
(0.4998723781629637, 1.0)
(0.4707235000047709, 0.0)
(0.525400671366715, 1.0)
(0.48097184157406053, 0.0)
(0.4562378027506945, 0.0)
(0.46077410720140005, 0.0)
(0.531856873614567, 0.0)

Github repo is here: Click.

CodePudding user response：

It's unclear why you want to initialize your features with random integer weights, if you pass equal weights to them, it converges pretty ok with 100 epochs:

class MyLogReg:

    def __init__(self, n):
        self._numinputs = n
        self._weights = numpy.repeat(1.0,n)
        self._bias = 0 # or = numpy.random.rand(1)
        self._lam = 0.01 # lambda parameter for regularization

    def pred(self, x):

        return sigmoid(numpy.matmul(x, self._weights)   self._bias)

import numpy as np
from sklearn.datasets import make_classification

X,y = make_classification(n_features=3,n_redundant=1,n_informative=2,
class_sep=0.7,random_state=22)

NN = MyLogReg(3)
train(NN, X, y, 100, 0.5)

Epoch 100 | Error: 0.3930719987698364 with weights [0.08555415 1.38852617 1.65479616] and bias 0.27656565660444704
grad_w: [ 7.85974808e-05 -8.63304471e-04 -1.02206054e-03] --- grad_b: -0.00022828688162301436

NN.pred(X)[:20]
 
array([0.04147539, 0.75444612, 0.92599311, 0.92906995, 0.330026  ,
       0.96483765, 0.90184527, 0.21597661, 0.43732915, 0.17307697,
       0.98569769, 0.11334725, 0.90186428, 0.96431985, 0.27836055,
       0.05338276, 0.02682678, 0.96073064, 0.32182455, 0.57531559])

We can check the training accuracy:

from sklearn.metrics import confusion_matrix
confusion_matrix(y,(NN.pred(X)>0.5).astype(int))

array([[43,  7],
       [ 6, 44]])

If you would really want to initialize with weights from a random uniform, I suspect you need to increase the learning rate for it to converge.