I have a convolutional network that gets images, but also a colored border on each image for additional information input to the network. Now I want to calculate the loss, but the usual loss function will also take the predicted border into account. The border is completely random, and is just an input to the system. I don’t want the model to think it has performed badly, when it predicted the wrong color. This happens in the DataLoader.getitem:
def __getitem__(self, index):
path = self.input_data[index]
imgs_path = sorted(glob.glob(path '/*.png'))
#read light conditions
lightConditions = []
with open(path "/lightConditions.json", 'r') as file:
lightConditions = json.load(file)
#shift light conditions
lightConditions.pop(0)
lightConditions.append(False)
frameNumber = 0
imgs = []
for img_path in imgs_path:
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
im_pil = Image.fromarray(img)
#img = cv2.resize(img, (256,448))
if lightConditions[frameNumber] ==False:
imgBorder = ImageOps.expand(im_pil,border = 6, fill='black')
else:
imgBorder = ImageOps.expand(im_pil, border = 6, fill='orange')
img = np.asarray(imgBorder)
img = cv2.resize(img, (256,448))
#img = cv2.resize(img, (0, 0), fx=0.5, fy=0.5, interpolation=cv2.INTER_CUBIC) #has been 0.5 for official data, new is fx = 2.63 and fy = 2.84
img_tensor = ToTensor()(img).float()
imgs.append(img_tensor)
frameNumber =1
imgs = torch.stack(imgs, dim=0)
return imgs
And then this is done in training:
for idx_epoch in range(startEpoch, nEpochs):
#set epoch in dataloader for right shuffle ->set seed really random
val_loader.sampler.set_epoch(idx_epoch)
#Remember time for displaying time for epoch
startTimeEpoch = datetime.now()
i = 0
if processGPU==0:
running_loss = 0
beenValuated = False
for index, data_sr in enumerate(train_loader):
#Transfer Data to GPU but don't block other processes because this only effects this single process
data_sr = data_sr.cuda(processGPU, non_blocking=True)
startTimeIteration = time.time()
#Remove all dimensions of size 1
data_sr = data_sr.squeeze()
# calculate the index of the input images and GT images
num_f = len(data_sr)
#If model_type is 0 -> only calculate one frame that is marked with gt
if cfg.model_type == 0:
idx_start = random.randint(-2, 2)
idx_all = list(np.arange(idx_start, idx_start num_f).clip(0, num_f - 1))
idx_gt = [idx_all.pop(int(num_f / 2))]
idx_input = idx_all
#Else when model_type is 1 then input frames 1,2,3 and predict frame 4 to number of cfg.dec_frames. Set all images that will be predicted to 'gt' images
else:
idx_all = np.arange(0, num_f)
idx_input = list(idx_all[0:4])
idx_gt = list(idx_all[4:4 cfg.dec_frames])
imgs_input = data_sr[idx_input]
imgs_gt = data_sr[idx_gt]
# get predicted result
imgs_pred = model(imgs_input)
I use cfg.model_type = 1. This model will give me new images with also a colored border. And usually here follows a loss calculation:
loss = criterion_mse(imgs_pred, imgs_gt)
But I can no longer use this. Does anyone know how to write a custom loss function that only takes certain parts of the tensor into account or which parts in the tensor represent which images?
CodePudding user response:
You can slice tensors the same way as in numpy. The dimensions of image batches are NCHW. If b
is your border size and it is symmetric from all sides then just crop the tensors:
loss = criterion_mse(imgs_pred[:, :, b:-b, b:-b] , imgs_gt[:, :, b:-b, b:-b])