This is my training model run.py
, my data is a one-dimensional matrix with one row and one category.
import numpy as np # linear algebra
import pandas as pd
import os
for dirname, _, filenames in os.walk('./kaggle'):
for filename in filenames:
print(os.path.join(dirname, filename))
import torch
from torch.utils.data import DataLoader
from torch import nn,optim
import sys
from tqdm import tqdm
import io
import torch.utils.model_zoo as model_zoo
import torch.onnx
def my_DataLoader(train_root,test_root,batch_size = 100, val_split_factor = 0.2):
train_df = pd.read_csv(train_root, header=None)
test_df = pd.read_csv(test_root, header=None)
train_data = train_df.to_numpy()
test_data = test_df.to_numpy()
train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(train_data[:, :-1]).float(),
torch.from_numpy(train_data[:, -1]).long(),)#
test_dataset = torch.utils.data.TensorDataset(torch.from_numpy(test_data[:, :-1]).float(),
torch.from_numpy(test_data[:, -1]).long())
train_len = train_data.shape[0]
val_len = int(train_len * val_split_factor)
train_len -= val_len
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_len, val_len])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
return train_loader, val_loader, test_loader
class conv_net(nn.Module):
def __init__(self, num_of_class):
super(conv_net, self).__init__()
self.model = nn.Sequential(
#nn.Conv1d(1, 16, kernel_size=5, stride=1, padding=2),
#nn.Conv1d(1, 16, kernel_size=1, stride=1),
nn.Conv1d(1, 16, kernel_size=1, stride=1),
nn.BatchNorm1d(16),
nn.ReLU(),
nn.MaxPool1d(2),
nn.Conv1d(16, 64, kernel_size=5, stride=1, padding=2),
nn.BatchNorm1d(64),
nn.ReLU(),
nn.MaxPool1d(2),
)
#self.relu = nn.ReLU()
self.linear = nn.Sequential(
#nn.Linear(5120,32),
nn.Linear(5120,32),
nn.LeakyReLU(inplace=True),
nn.Linear(32, num_of_class),
)
def forward(self,x):
#org = x
x = x.unsqueeze(1)
x = self.model(x)
#x = self.relu(x)
# print(x.shape)
x = x.view(x.size(0), -1)
#x [b, 2944]
# print(x.shape)
x = self.linear(x)
return x
batch_size=32
lr = 3e-3
epochs = 150
torch.manual_seed(1234)
#device = torch.device("cpu:0 cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
def evalute(model, loader):
model.eval()
correct = 0
total = len(loader.dataset)
val_bar = tqdm(loader, file=sys.stdout)
for x, y in val_bar:
x, y = x.to(device), y.to(device)
with torch.no_grad():
logits = model(x)
pred = logits.argmax(dim=1)
correct = torch.eq(pred, y).sum().float().item()
return correct / total
def main():
train_loader, val_loader, test_loader = my_DataLoader('./kaggle/train.csv',
'./kaggle/test.csv',
batch_size=batch_size,
val_split_factor=0.2)
model = conv_net(8).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criteon = nn.CrossEntropyLoss()
# Print model's state_dict
print(model)
best_acc, best_epoch = 0, 0
global_step = 0
for epoch in range(epochs):
train_bar = tqdm(train_loader, file=sys.stdout)
for step, (x, y) in enumerate(train_bar):
# x: [b, 187], y: [b]
x, y = x.to(device), y.to(device)
model.train()
logits = model(x)
loss = criteon(logits, y)
optimizer.zero_grad()
loss.backward()
# for param in model.parameters():
# print(param.grad)
optimizer.step()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch 1,
epochs,
loss)
global_step = 1
if epoch % 1 == 0: # You can change the validation frequency as you wish
val_acc = evalute(model, val_loader)
print('val_acc = ',val_acc)
if val_acc > best_acc:
best_epoch = epoch
best_acc = val_acc
# Export the model
name_pt = 'best3.pt'
torch.save(model.state_dict(), name_pt)
print('best acc:', best_acc, 'best epoch:', best_epoch)
model.load_state_dict(torch.load(name_pt))
print('loaded from ckpt!')
test_acc = evalute(model, test_loader)
print('test acc:', test_acc)
if __name__ == '__main__':
main()
Then I try to make predictions and modify with reference to other people's code
import torch
from torchvision.transforms import transforms
import pandas as pd
from PIL import Image
from run import conv_net
from pathlib import Path
name_pt = 'best3.pt'
model = conv_net(8)
checkpoint = torch.load(name_pt)
model.load_state_dict(checkpoint)
testdata = './kaggle/onedata.csv'
test_df = pd.read_csv(testdata, header=None)
test_data = test_df.to_numpy()
csv = torch.utils.data.TensorDataset(torch.from_numpy(test_data[:, :]).float())
output = model(csv)
prediction = int(torch.max(output.data, 1)[1].numpy())
print(prediction)
if (prediction == 0):
print ('other')
if (prediction == 1):
print ('100%PET')
if (prediction == 2):
print ('100% Cotton')
if (prediction == 3):
print ('100% Nylon')
if (prediction == 4):
print ('>70% PET')
if (prediction == 5):
print ('<70% PET')
if (prediction == 6):
print ('Spandex/PET Spandex<5%')
if (prediction == 7):
print ('Spandex/PET Spandex>5%')
Something went wrong
File "C:\Users\54-0461100-01\Desktop\for_spec_train\run.py", line 70, in forward
x = x.unsqueeze(1)
AttributeError: 'TensorDataset' object has no attribute 'unsqueeze'
Most of the questions are for images, not found on CSV files.Any help is appreciated if you have any suggestions.
By the way this is my data format.
LJ column are labels,train and test set are same format enter image description here onedata format enter image description here
CodePudding user response:
When calling output = model(csv)
you are passing the model a 'TensorDataset' object as the input instead of a tensor. You can access the tensors in this object by indexing it. https://pytorch.org/docs/stable/_modules/torch/utils/data/dataset.html#TensorDataset
Additionally, you can avoid the TensorDataset object all together by replacing
csv = torch.utils.data.TensorDataset(torch.from_numpy(test_data[:, :]).float())
with
csv = torch.from_numpy(test_data[:, :]).float()