Need help on writing my own logistic regression model. Input data (generated via sklearn.datasets.make_classification) looks like this (printed for 10 entries):
[[ 0.74186571 -1.69239663 2.06965145]
[-1.80076727 0.59700581 -1.57159523]
[ 1.0328198 0.62274582 0.90241322]
[-0.63972474 2.12054103 1.30124807]
[ 1.04275475 -0.86879077 1.08399317]
[-1.12772782 0.26396098 -1.68130012]
[ 0.92281318 -1.15431326 0.23868389]
[-0.37260971 -0.97979894 1.65890322]
[ 0.4513904 0.30502349 2.46449598]
[-2.79502998 0.05500871 -2.47725562]]
Output like this:
[0. 0. 1. 1. 0. 1. 0. 0. 0. 0.]
Here is my code:
import numpy
import math
# import data
path = r"C:\Users\felix\sciebo2\Atom_working_dir\ML_sandbox\\" # put in global path here
x_logregdata = numpy.genfromtxt(path "x_logregdata.csv", delimiter=",")
y_logregdata = numpy.genfromtxt(path "y_logregdata.csv", delimiter=",")
class MyLogReg:
def __init__(self, n):
self._numinputs = n
self._weights = numpy.random.rand(n)
self._bias = 0 # or = numpy.random.rand(1)
self._lam = 0.01 # lambda parameter for regularization
def pred(self, x):
return sigmoid(numpy.matmul(x, self._weights) self._bias)
# -----------------------------------------------------------------------------
# functions
def logloss(net,x,y): # p=prediction, t=target
p = sigmoid(numpy.matmul(x, net._weights) net._bias) # (1000,3)x(3,1) becomes (1000,1)
logloss_term = (numpy.matmul(-y.T, numpy.log(p)) - numpy.matmul((1-y.T),numpy.log(1-p)))/len(p) # transpose, so (1,1000)x(1000,1) becomes (1)
regularization_term = numpy.mean(net._weights**2 * net._lam/2)
J = logloss_term regularization_term
return J
def sigmoid(z):
# z = b w1x1 w2x2 ... wnxn
return 1/(1 numpy.exp(-z))
def train(net,x,t,epochs,lr):
for epoch in range(epochs):
# x comes in the shape of {n_samples, n_features}
p = net.pred(x) # make predictions
e = logloss(net,x,t)
grad_w = (numpy.matmul(x.T,(p-t)) net._lam*net._weights)/len(p) # dim: (3,1)
grad_b = delta_bi = numpy.mean(p-t)
if (epoch 1)%100 == 0:
print(f"Epoch {epoch 1} | Error: {e} with weights {NN._weights} and bias {NN._bias}")
print(f"grad_w: {grad_w} --- grad_b: {grad_b}")
# update weights and bias
NN._weights = lr*(-grad_w)
NN._bias = lr*(-grad_b)
NN = MyLogReg(3)
# # -----------------------------------------------------------------------------
# # training process
print("Training process started!\n")
train(NN, x_logregdata, y_logregdata, 100, 0.5)
Training output at last epoch:
Epoch 1000 | Error: 0.6919551095870804 with weights [ 0.01013472 0.04960763 -0.06680454] and bias -0.012293556999268884
grad_w: [-4.62077479e-18 6.62621000e-18 7.92638524e-18] --- grad_b: -1.3322676295501879e-18
After training on the entire data set, predictions look random and are all close to 0.5, even though gradients have become very small:
for i in range(10):
print((NN.pred(x_logregdata[i,:]), y_logregdata[i]))
(0.4434940888958877, 0.0)
(0.5259920670963649, 0.0)
(0.49219604260551786, 1.0)
(0.4998723781629637, 1.0)
(0.4707235000047709, 0.0)
(0.525400671366715, 1.0)
(0.48097184157406053, 0.0)
(0.4562378027506945, 0.0)
(0.46077410720140005, 0.0)
(0.531856873614567, 0.0)
Github repo is here: Click.
CodePudding user response:
It's unclear why you want to initialize your features with random integer weights, if you pass equal weights to them, it converges pretty ok with 100 epochs:
class MyLogReg:
def __init__(self, n):
self._numinputs = n
self._weights = numpy.repeat(1.0,n)
self._bias = 0 # or = numpy.random.rand(1)
self._lam = 0.01 # lambda parameter for regularization
def pred(self, x):
return sigmoid(numpy.matmul(x, self._weights) self._bias)
import numpy as np
from sklearn.datasets import make_classification
X,y = make_classification(n_features=3,n_redundant=1,n_informative=2,
NN = MyLogReg(3)
train(NN, X, y, 100, 0.5)
Epoch 100 | Error: 0.3930719987698364 with weights [0.08555415 1.38852617 1.65479616] and bias 0.27656565660444704
grad_w: [ 7.85974808e-05 -8.63304471e-04 -1.02206054e-03] --- grad_b: -0.00022828688162301436
array([0.04147539, 0.75444612, 0.92599311, 0.92906995, 0.330026 ,
0.96483765, 0.90184527, 0.21597661, 0.43732915, 0.17307697,
0.98569769, 0.11334725, 0.90186428, 0.96431985, 0.27836055,
0.05338276, 0.02682678, 0.96073064, 0.32182455, 0.57531559])
We can check the training accuracy:
from sklearn.metrics import confusion_matrix
array([[43, 7],
[ 6, 44]])
If you would really want to initialize with weights from a random uniform, I suspect you need to increase the learning rate for it to converge.