TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tenso-CodePudding

I have the error being displayed whilst trying to plot the graph...

I am sharing the code in the following link: https://colab.research.google.com/drive/1PooWIPVhm67iZquqZvxz3mdfmd6rv-3d#scrollTo=qSM7mNrKhBOt

I think I'm missing 'tensor.cpu()' somewhere but I can't really pinpoint it.. Everything else works :/ Can anyone help please?

def train_epoch(
  model,
  data_loader,
  loss_fn,
  optimizer,
  device,
  scheduler,
  n_examples
):
  model = model.train()
  losses = []
  correct_predictions = 0
  for d in data_loader:
    input_ids = d["input_ids"].to(device)
    attention_mask = d["attention_mask"].to(device)
    targets = d["targets"].to(device)
    outputs = model(
      input_ids=input_ids,
      attention_mask=attention_mask
    )
    _, preds = torch.max(outputs, dim=1)
    loss = loss_fn(outputs, targets)
    correct_predictions  = torch.sum(preds == targets)
    losses.append(loss.item())
    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()
  return correct_predictions.double() / n_examples, np.mean(losses)

def eval_model(model, data_loader, loss_fn, device, n_examples):
  model = model.eval()
  losses = []
  correct_predictions = 0
  with torch.no_grad():
    for d in data_loader:
      input_ids = d["input_ids"].to(device)
      attention_mask = d["attention_mask"].to(device)
      targets = d["targets"].to(device)
      outputs = model(
        input_ids=input_ids,
        attention_mask=attention_mask
      )
      _, preds = torch.max(outputs, dim=1)
      loss = loss_fn(outputs, targets)
      correct_predictions  = torch.sum(preds == targets)
      losses.append(loss.item())
  return correct_predictions.double() / n_examples, np.mean(losses)

%%time
history = defaultdict(list)
best_accuracy = 0
for epoch in range(EPOCHS):
  print(f'Epoch {epoch   1}/{EPOCHS}')
  print('-' * 10)
  train_acc, train_loss = train_epoch(
    model,
    train_data_loader,
    loss_fn,
    optimizer,
    device,
    scheduler,
    len(df_train)
  )
  print(f'Train loss {train_loss} accuracy {train_acc}')
  val_acc, val_loss = eval_model(
    model,
    val_data_loader,
    loss_fn,
    device,
    len(df_val)
  )
  print(f'Val   loss {val_loss} accuracy {val_acc}')
  print()
  history['train_acc'].append(train_acc)
  history['train_loss'].append(train_loss)
  history['val_acc'].append(val_acc)
  history['val_loss'].append(val_loss)
  if val_acc > best_accuracy:
    torch.save(model.state_dict(), 'best_model_state.bin')
    best_accuracy = val_acc

plt.plot(history['train_acc'], label='train accuracy')
plt.plot(history['val_acc'], label='validation accuracy')

plt.title('Training history')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()
plt.ylim([0, 1]);

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/matplotlib/cbook/__init__.py in index_of(y)
   1626     try:
-> 1627         return y.index.values, y.values
   1628     except AttributeError:

AttributeError: 'builtin_function_or_method' object has no attribute 'values'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
8 frames
<__array_function__ internals> in atleast_1d(*args, **kwargs)

/usr/local/lib/python3.7/dist-packages/torch/_tensor.py in __array__(self, dtype)
    730             return handle_torch_function(Tensor.__array__, (self,), self, dtype=dtype)
    731         if dtype is None:
--> 732             return self.numpy()
    733         else:
    734             return self.numpy().astype(dtype, copy=False)

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

CodePudding user response：

When you are computing the number of correct predictions: correct_predictions = torch.sum(preds == targets), both preds and targets are CUDA tensors, which matplotlib knows nothing about.

In this case, we should detach the tensor (to stop Autograd tracking it), push the data from GPU to CPU, and convert it to numpy elements, like so: torch.sum(preds == targets).detach().cpu().numpy().

Further, since the number of correct predictions is just a single number, we can just do torch.sum(preds == targets).item() which is a shorthand for the above, but only if the tensor is a singleton.

This way, correct_predictions is a Python integer, & you can return float(correct_predictions) / n_examples from your methods and pass them onto matplotlib.

For further reading: