I am comparing the accuracy of sklearn's MLPRegressor with an equivalent net in PyTorch but the PyTorch model is always much worse. I can't figure out why. Here is my code for both below.
poly = PolynomialFeatures(2,interaction_only=True)
X_train, X_test, y_train, y_test = train_test_split(poly.fit_transform(X),y.ravel(),
test_size=0.15,
random_state=0,shuffle=True)
#print(X_train)
layers = (78,22,8,3,3,1)
regr_nn = MLPRegressor(hidden_layer_sizes=layers,random_state=0, max_iter=20000,
solver='lbfgs',
activation='tanh',alpha=1e-5)
regr_nn.fit(X_train, y_train)
y_predict_test_nn = regr_nn.predict(y_test)
y_predict_train_nn = regr_nn.predict(y_train)
test_score = regr_nn.score(X_test, y_test)
train_score = regr_nn.score(X_train, y_train)
poly = PolynomialFeatures(2,interaction_only=True)
X_train, X_test, y_train, y_test = train_test_split(poly.fit_transform(X),
y.ravel(),test_size=0.15,
random_state=0)
# torch can only train on Variable, so convert them to Variable
x_test, y_test = torch.from_numpy(X_test.astype('float')), torch.from_numpy(y_test)
y_test = y_test.reshape((y_test.shape[0], 1))
x_train, y_train = torch.from_numpy(X_train.astype('float')), torch.from_numpy(y_train)
y_train = y_train.reshape((y_train.shape[0], 1))
class Train_set(torch.utils.data.Dataset):
def __init__(self, X, y):
if not torch.is_tensor(X) and not torch.is_tensor(y):
self.X = torch.from_numpy(X)
self.y = torch.from_numpy(y)
else:
self.X = X
self.y = y
def __len__(self):
return len(self.X)
def __getitem__(self, i):
return self.X[i], self.y[i]
class Net(torch.nn.Module):
def __init__(self, n_feature):
super(Net, self).__init__()
self.regress = nn.Sequential(nn.Linear(n_feature,78),nn.Tanh(),
nn.Linear(78, 22),nn.Tanh(),
nn.Linear(22, 8),nn.Tanh(), nn.Linear(8, 3),nn.Tanh(),
nn.Linear(3,3), nn.Tanh(),nn.Linear(3, 1))
def forward(self, x):
return self.regress(x.float()) # activation function for hidden layer
net = Net(n_feature=x_train.size(1))
net.to(cuda)
# print(net) # net architecture
optimizer = torch.optim.LBFGS(net.parameters(), max_iter=20000, lr=1e-5,
tolerance_grad=1e-07,tolerance_change=1e-05)
loss_func = torch.nn.MSELoss() # this is for regression mean squared loss
train_set = Train_set(x_train,y_train)
trainloader = DataLoader(train_set, batch_size=10, shuffle=True)
CL = []
# train the network
for t in tqdm(range(10)):
for i, data in enumerate(trainloader, 0):
def closure():
# Get and prepare inputs
inputs, targets = data
inputs, targets = inputs.float(), targets.float()
inputs, targets = inputs.to(cuda), targets.to(cuda)
targets = targets.reshape((targets.shape[0], 1))
# Zero the gradients
optimizer.zero_grad()
# Perform forward pass
outputs = net(inputs)
# Compute loss
loss = loss_func(outputs, targets)
# Perform backward pass
loss.backward()
return loss
optimizer.step(closure) # apply gradients
prediction_train = net(x_train.to(cuda))
prediction_test = net(x_test.to(cuda))
train_score = r2_score(y_train.data.numpy(), prediction_train.data.numpy())
test_score = r2_score(y_test.data.numpy(), prediction_test.data.numpy())
The R^2 score from sklearn is above 0.9 and the parity plot looks like a line but the scores from PyTorch are close to zero and the parity plot looks awful. Sklearn result PyTorch result I would really appreciate any help. Thank you!
CodePudding user response:
I think your closure function needs to be inside the trainloader
loop:
for t in tqdm(range(10)):
for i, data in enumerate(trainloader, 0):
def closure():
# Get and prepare inputs
inputs, targets = data
inputs, targets = inputs.float(), targets.float()
inputs, targets = inputs.to(cuda), targets.to(cuda)
targets = targets.reshape((targets.shape[0], 1))
# Zero the gradients
optimizer.zero_grad()
# Perform forward pass
outputs = net(inputs)
# Compute loss
loss = loss_func(outputs, targets)
# Perform backward pass
loss.backward()
return loss
optimizer.step(closure) #<<< this is applied to the END of `closure`
I can't say for sure as I haven't used LBFGS too much, but i believe your current method will only step once per epoch.