Home > Back-end >  Defining my own gradient function for pytorch to use
Defining my own gradient function for pytorch to use

Time:09-01

I want to feed pytorch gradients manually. In my real problem, I have my own adjoint function that does not use tensors. Is there any way I can define my own gradient function for pytorch to use during optimization?

import numpy as np
import torch

# define rosenbrock function and gradient
x0 = np.array([0.1, 0.1])
a = 1
b = 5
def f(x):
   return (a - x[0]) ** 2   b * (x[1] - x[0] ** 2) ** 2

def jac(x):
   dx1 = -2 * a   4 * b * x[0] ** 3 - 4 * b * x[0] * x[1]   2 * x[0]
   dx2 = 2 * b * (x[1] - x[0] ** 2)
   return np.array([dx1, dx2])

# create stochastic rosenbrock function and gradient
# (the crude analogy is that I have predefined stochastic
#  forward and backward functions)
def f_rand(x):
   return f(x) * np.random.uniform(0.5, 1.5)

def jac_rand(x): return jac(x) * np.random.uniform(0.5, 1.5)


x_tensor = torch.tensor(x0, requires_grad=False)
optimizer = torch.optim.Adam([x_tensor], lr=0.1)

# here, closure is fed f_rand to compute the gradient.
# I need to feed closer the gradient directly from jac_rand
def closure():
   optimizer.zero_grad()
   loss = f_rand(x_tensor)
   loss.backward() # jac_rand(x)
   return loss

for ii in range(200):
   optimizer.step(closure) 

print(x_tensor, f(x_tensor))
  # tensor([1.0000, 1.0000], dtype=torch.float64, requires_grad=True) tensor(4.5799e-09, dtype=torch.float64, grad_fn=<AddBackward0>)
  # ( this is the right answer, E[f(1, 1)] = 0 )

I've tried defining a custom function, but I can't get it to work. This is my best attempt so far:

import numpy as np
import torch

# define rosenbrock function and gradient

x0 = np.array([0.1, 0.1])
a = 1
b = 5
def f(x):
   return (a - x[0]) ** 2   b * (x[1] - x[0] ** 2) ** 2

def jac(x):
   dx1 = -2 * a   4 * b * x[0] ** 3 - 4 * b * x[0] * x[1]   2 * x[0]
   dx2 = 2 * b * (x[1] - x[0] ** 2)
   return np.array([dx1, dx2])

# create stochastic rosenbrock function and gradient
def f_rand(x):
   return f(x) * np.random.uniform(0.5, 1.5)

def jac_rand(x): return jac(x) * np.random.uniform(0.5, 1.5)

class custom_function(torch.autograd.Function):

    @staticmethod
    def forward(ctx, input):
       ctx.save_for_backward(input)
       return f_rand(input)

    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        return grad_output * g_rand(input)

x_tensor = torch.tensor(x0, requires_grad=False)
optimizer = torch.optim.Adam([x_tensor], lr=0.1)

for ii in range(200):
   print('x_tensor ', x_tensor)
   optimizer.step(custom_function())

print(x_tensor, f(x_tensor))

It says:

RuntimeError: Legacy autograd function with non-static forward method is deprecated. Please use new-style autograd function with static forward method. (Example: https://pytorch.org/docs/stable/autograd.html#torch.autograd.Function)

CodePudding user response:

Not quite sure if this is exactly what you want but the method call loss.backward() computes gradients via pytorch's computational graph and stores the gradient values in the weight tensors themselves (in your case it's in x_tensor). And these gradients can be accessed via x_tensor.grad. However, if you don't want to use pytorch's gradient computing method using loss.backward(), then you can manually feed your gradients into your tensor's .grad attribute as follows:

with torch.no_grad():
    def closure():
       optimizer.zero_grad()
       loss = f_rand(x_tensor)
       x_tensor.grad = torch.from_numpy(jac_rand(x_tensor))
       return loss

CodePudding user response:

I made some modifications, mainly the learning rate and the number of iterations. You will see the loss goes to zero as the tensor approaches (a, a²).

import torch
import numpy as np
import torch

# define rosenbrock function and gradient
np.random.seed(0)
x0 = np.array([0.1, 0.1])
a = 2
b = 100
def f(x):
   return (a - x[0]) ** 2   b * (x[1] - x[0] ** 2) ** 2
   
def jac(x):
   dx1 = -2 * a   4 * b * x[0] ** 3 - 4 * b * x[0] * x[1]   2 * x[0]
   dx2 = 2 * b * (x[1] - x[0] ** 2)
   return np.array([dx1, dx2])

# create stochastic rosenbrock function and gradient
def f_rand(x):
    #return f(x)
    return f(x) * np.random.uniform(0.5, 1.5)

def jac_rand(x):
    #return jac(x)
    return jac(x) * np.random.uniform(0.5, 1.5)

class CustomFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
       ctx.save_for_backward(input)
       return f_rand(input)

    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        return grad_output * jac_rand(input)
custom_function = CustomFunction.apply
x_tensor = torch.tensor(x0, requires_grad=True)
optimizer = torch.optim.Adam([x_tensor], lr=0.0001)
print('x_tensor ', x_tensor)
for ii in range(100000):
   output=custom_function(x_tensor)
   loss = round(output.item(),4)
   if loss < 0.0001:
       print('loss: ',loss)
       break
   print('loss: ',loss)
   output.backward()
   optimizer.step()

print(x_tensor, f(x_tensor))
  • Related