Home > Software engineering >  PyTorch NN not as good as sklearn MLP
PyTorch NN not as good as sklearn MLP

Time:02-22

I am comparing the accuracy of sklearn's MLPRegressor with an equivalent net in PyTorch but the PyTorch model is always much worse. I can't figure out why. Here is my code for both below.

poly = PolynomialFeatures(2,interaction_only=True)
X_train, X_test, y_train, y_test = train_test_split(poly.fit_transform(X),y.ravel(),
                                                    test_size=0.15, 
                                                    random_state=0,shuffle=True)       
#print(X_train)
layers = (78,22,8,3,3,1)
regr_nn = MLPRegressor(hidden_layer_sizes=layers,random_state=0, max_iter=20000,
                       solver='lbfgs',
                       activation='tanh',alpha=1e-5)
regr_nn.fit(X_train, y_train)
y_predict_test_nn = regr_nn.predict(y_test)
y_predict_train_nn = regr_nn.predict(y_train)
test_score = regr_nn.score(X_test, y_test)
train_score = regr_nn.score(X_train, y_train)
poly = PolynomialFeatures(2,interaction_only=True)
X_train, X_test, y_train, y_test = train_test_split(poly.fit_transform(X), 
                                                    y.ravel(),test_size=0.15,
                                                    random_state=0)
# torch can only train on Variable, so convert them to Variable
x_test, y_test = torch.from_numpy(X_test.astype('float')), torch.from_numpy(y_test)
y_test = y_test.reshape((y_test.shape[0], 1))
x_train, y_train = torch.from_numpy(X_train.astype('float')), torch.from_numpy(y_train)
y_train = y_train.reshape((y_train.shape[0], 1))
class Train_set(torch.utils.data.Dataset):
  def __init__(self, X, y):
    if not torch.is_tensor(X) and not torch.is_tensor(y):
      self.X = torch.from_numpy(X)
      self.y = torch.from_numpy(y)
    else:
      self.X = X
      self.y = y

  def __len__(self):
      return len(self.X)

  def __getitem__(self, i):
      return self.X[i], self.y[i]


class Net(torch.nn.Module):
    def __init__(self, n_feature):
      super(Net, self).__init__()
      self.regress = nn.Sequential(nn.Linear(n_feature,78),nn.Tanh(),
          nn.Linear(78, 22),nn.Tanh(), 
          nn.Linear(22, 8),nn.Tanh(), nn.Linear(8, 3),nn.Tanh(),
          nn.Linear(3,3), nn.Tanh(),nn.Linear(3, 1))

    def forward(self, x):
      return self.regress(x.float())      # activation function for hidden layer


net = Net(n_feature=x_train.size(1))
net.to(cuda)

# print(net)  # net architecture
optimizer = torch.optim.LBFGS(net.parameters(), max_iter=20000, lr=1e-5,
                              tolerance_grad=1e-07,tolerance_change=1e-05)
loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss    
train_set = Train_set(x_train,y_train)
trainloader = DataLoader(train_set, batch_size=10, shuffle=True)
CL = []
# train the network
for t in tqdm(range(10)):
  for i, data in enumerate(trainloader, 0):
    def closure():
      # Get and prepare inputs
      inputs, targets = data
      inputs, targets = inputs.float(), targets.float()
      inputs, targets = inputs.to(cuda), targets.to(cuda)
      targets = targets.reshape((targets.shape[0], 1))
      # Zero the gradients
      optimizer.zero_grad()
      # Perform forward pass
      outputs = net(inputs)
      # Compute loss
      loss = loss_func(outputs, targets)
      # Perform backward pass
      loss.backward()
      return loss
    optimizer.step(closure)     # apply gradients
prediction_train = net(x_train.to(cuda))
prediction_test = net(x_test.to(cuda))
train_score = r2_score(y_train.data.numpy(), prediction_train.data.numpy())
test_score = r2_score(y_test.data.numpy(), prediction_test.data.numpy())

The R^2 score from sklearn is above 0.9 and the parity plot looks like a line but the scores from PyTorch are close to zero and the parity plot looks awful. Sklearn result PyTorch result I would really appreciate any help. Thank you!

CodePudding user response:

I think your closure function needs to be inside the trainloader loop:

for t in tqdm(range(10)):
  for i, data in enumerate(trainloader, 0):
    def closure():
      # Get and prepare inputs
      inputs, targets = data
      inputs, targets = inputs.float(), targets.float()
      inputs, targets = inputs.to(cuda), targets.to(cuda)
      targets = targets.reshape((targets.shape[0], 1))
      # Zero the gradients
      optimizer.zero_grad()
      # Perform forward pass
      outputs = net(inputs)
      # Compute loss
      loss = loss_func(outputs, targets)
      # Perform backward pass
      loss.backward()
    return loss
  optimizer.step(closure)     #<<< this is applied to the END of `closure`

I can't say for sure as I haven't used LBFGS too much, but i believe your current method will only step once per epoch.

  • Related