Home > Enterprise >  How do I use a pt file in Pytorch to predict the label of a new data?
How do I use a pt file in Pytorch to predict the label of a new data?

Time:09-14

This is my training model run.py, my data is a one-dimensional matrix with one row and one category.

import numpy as np # linear algebra
import pandas as pd
import os
for dirname, _, filenames in os.walk('./kaggle'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
import torch
from torch.utils.data import DataLoader
from torch import nn,optim
import sys
from tqdm import tqdm
import io
import torch.utils.model_zoo as model_zoo
import torch.onnx

def my_DataLoader(train_root,test_root,batch_size = 100, val_split_factor = 0.2):

    train_df = pd.read_csv(train_root, header=None)
    test_df = pd.read_csv(test_root, header=None)

    train_data = train_df.to_numpy()
    test_data = test_df.to_numpy()

    train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(train_data[:, :-1]).float(),
                                                   torch.from_numpy(train_data[:, -1]).long(),)#
    test_dataset = torch.utils.data.TensorDataset(torch.from_numpy(test_data[:, :-1]).float(),
                                                  torch.from_numpy(test_data[:, -1]).long())

    train_len = train_data.shape[0]
    val_len = int(train_len * val_split_factor)
    train_len -= val_len

    train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_len, val_len])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

    return train_loader, val_loader, test_loader
class  conv_net(nn.Module):

    def __init__(self, num_of_class):
        super(conv_net, self).__init__()

        self.model = nn.Sequential(
            #nn.Conv1d(1, 16, kernel_size=5, stride=1, padding=2),
            #nn.Conv1d(1, 16, kernel_size=1, stride=1),
            nn.Conv1d(1, 16, kernel_size=1, stride=1),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.MaxPool1d(2),

            nn.Conv1d(16, 64, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2),
        )

        #self.relu = nn.ReLU()
        self.linear = nn.Sequential(
            #nn.Linear(5120,32),
            nn.Linear(5120,32),
            nn.LeakyReLU(inplace=True),
            nn.Linear(32, num_of_class),
        )

    def forward(self,x):
        #org = x
        x = x.unsqueeze(1)
        x = self.model(x)
        #x = self.relu(x)
        # print(x.shape)
        x = x.view(x.size(0), -1)
        #x [b, 2944]
        # print(x.shape)
        x = self.linear(x)
        return x
batch_size=32
lr = 3e-3
epochs = 150
torch.manual_seed(1234)
#device = torch.device("cpu:0 cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))

def evalute(model, loader):
    model.eval()

    correct = 0
    total = len(loader.dataset)
    val_bar = tqdm(loader, file=sys.stdout)
    for x, y in val_bar:
        x, y = x.to(device), y.to(device)
        with torch.no_grad():
            logits = model(x)
            pred = logits.argmax(dim=1)
        correct  = torch.eq(pred, y).sum().float().item()


    return correct / total
def main():

    train_loader, val_loader, test_loader = my_DataLoader('./kaggle/train.csv',
                                                          './kaggle/test.csv',
                                                          batch_size=batch_size,
                                                          val_split_factor=0.2)

    model = conv_net(8).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criteon = nn.CrossEntropyLoss()
    # Print model's state_dict
    print(model)

    best_acc, best_epoch = 0, 0
    global_step = 0

    for epoch in range(epochs):

        train_bar = tqdm(train_loader, file=sys.stdout)
        for step, (x, y) in enumerate(train_bar):
            # x: [b, 187], y: [b]
            x, y = x.to(device), y.to(device)

            model.train()
            logits = model(x)
            loss = criteon(logits, y)

            optimizer.zero_grad()
            loss.backward()

            # for param in model.parameters():
            #     print(param.grad)

            optimizer.step()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch   1,
                                                                     epochs,
                                                                     loss)

            global_step  = 1
        if epoch % 1 == 0:  # You can change the validation frequency as you wish

            val_acc = evalute(model, val_loader)
            print('val_acc = ',val_acc)
            if val_acc > best_acc:
                best_epoch = epoch
                best_acc = val_acc
                # Export the model
                name_pt = 'best3.pt'
                torch.save(model.state_dict(), name_pt)
    print('best acc:', best_acc, 'best epoch:', best_epoch)
    model.load_state_dict(torch.load(name_pt))
    print('loaded from ckpt!')
    
    test_acc = evalute(model, test_loader)
    print('test acc:', test_acc)

if __name__ == '__main__':
    main()

Then I try to make predictions and modify with reference to other people's code

import torch
from torchvision.transforms import transforms
import pandas as pd
from PIL import Image
from run import conv_net
from pathlib import Path

name_pt = 'best3.pt'
model = conv_net(8)
checkpoint = torch.load(name_pt)
model.load_state_dict(checkpoint)

testdata = './kaggle/onedata.csv'
test_df = pd.read_csv(testdata, header=None)
test_data = test_df.to_numpy()
csv = torch.utils.data.TensorDataset(torch.from_numpy(test_data[:, :]).float())

output = model(csv)

prediction = int(torch.max(output.data, 1)[1].numpy())
print(prediction)

if (prediction == 0):
    print ('other')
if (prediction == 1):
    print ('100%PET')
if (prediction == 2):
    print ('100% Cotton')
if (prediction == 3):
    print ('100% Nylon')
if (prediction == 4):
    print ('>70% PET')
if (prediction == 5):
    print ('<70% PET')
if (prediction == 6):
    print ('Spandex/PET Spandex<5%')
if (prediction == 7):
    print ('Spandex/PET Spandex>5%')

Something went wrong

  File "C:\Users\54-0461100-01\Desktop\for_spec_train\run.py", line 70, in forward
    x = x.unsqueeze(1)
AttributeError: 'TensorDataset' object has no attribute 'unsqueeze'

Most of the questions are for images, not found on CSV files.Any help is appreciated if you have any suggestions.

By the way this is my data format.

LJ column are labels,train and test set are same format enter image description here onedata format enter image description here

CodePudding user response:

When calling output = model(csv) you are passing the model a 'TensorDataset' object as the input instead of a tensor. You can access the tensors in this object by indexing it. https://pytorch.org/docs/stable/_modules/torch/utils/data/dataset.html#TensorDataset

Additionally, you can avoid the TensorDataset object all together by replacing

csv = torch.utils.data.TensorDataset(torch.from_numpy(test_data[:, :]).float())

with

csv = torch.from_numpy(test_data[:, :]).float()
  • Related