In [1]:
## Import
from __future__ import print_function
# from utils.dataset import BasicDataset
# from utils.patient import Patient
from utils.data import RegressorDataset
import torch
from torch.utils.data import DataLoader
from torch import optim
import torch.nn as nn
import torch.nn.functional as F
from unet.unet_model import RegressorUNet
from unet.unet_parts import SegmentationRegressionLoss
import numpy as np
import matplotlib.pyplot as plt
import time

In [2]:
## Define Dataset

import os

imgs_dir = "/Data/ContijochLab/projects/autoseg/train/img/"
segs_dir = "/Data/ContijochLab/projects/autoseg/train/seg/"
boundbox_path = "/Data/ContijochLab/projects/autoseg/data/boundbox.csv"

if torch.cuda.is_available():
    print('Using GPU')
else:
    print('Using CPU')

dataset = RegressorDataset(imgs_dir,segs_dir,boundbox_path,data_device='cuda:1' if torch.cuda.is_available() else 'cpu')

Using GPU


In [3]:
## Define Data Loader
batch_size = 5
loader = DataLoader(dataset,batch_size = batch_size)

In [4]:
## Parameters
n_channels = 1
n_regress = 2
n_bottle = 16
n_hidden = 1024
n_classes = dataset.__nclass__()
img_size = 512
lr = 0.001
wd = 1e-8
mm = 0.9
ep = 100

In [5]:
## Define Net
net = RegressorUNet(n_channels=n_channels, n_classes=n_classes+1,n_regress=n_regress,n_hidden=n_hidden,n_bottle=n_bottle,img_size=img_size)
net.to(device=dataset.dev)

RegressorUNet(
  (inc): DoubleConv(
    (double_conv): Sequential(
      (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
    )
  )
  (down1): Down(
    (maxpool_conv): Sequential(
      (0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (1): DoubleConv(
        (double_conv): Sequential(
          (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
          (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (4): BatchNorm2d(128, eps=1e-0

In [6]:
## Set up Optimizer

optimizer  = optim.RMSprop(net.parameters(),lr = lr, weight_decay = wd, momentum = mm)
critereon  = SegmentationRegressionLoss()  
lossweight = .0001 

In [None]:
## Train

for epoch in range(ep):
    net.train()
    epoch_loss = 0
    for batch in loader:
        imgs = batch['img']
        segs_gt = batch['seg']
        regs_gt = batch['org']

        imgs = imgs.to(device=dataset.dev, dtype=torch.float32)
        segs_gt = segs_gt.to(device=dataset.dev, dtype = torch.long)
        regs_gt = regs_gt.to(device=dataset.dev, dtype = torch.float32)

        segs_pr,regs_pr = net(imgs)

        loss = critereon(segs_pr,regs_pr,segs_gt,regs_gt,lossweight)
        epoch_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_value_(net.parameters(),0.1)
        optimizer.step()

#         print(loss.to(device='cpu').detach().numpy())

CE:  tensor(2.3026, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(9732.4756, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(3.2759, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(1.5897, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(8988.6367, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(2.4885, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(1.2425, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(2317.8767, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(1.4743, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.9744, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(82632.6641, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(9.2376, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.8560, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(49062.3242, device='cuda:1', grad_fn=<MseLossBackward>)
L

CE:  tensor(0.4637, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(845.8984, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.5482, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.4078, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(1340.2177, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.5418, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.4158, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(847.0192, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.5005, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.4388, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(382.3444, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.4770, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.5479, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(1041.2465, device='cuda:1', grad_fn=<MseLossBackward>)
L:   t

CE:  tensor(0.3749, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(101.5890, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.3850, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.5087, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(130.0391, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.5217, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.3537, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(34.1350, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.3571, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.3252, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(80.4532, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.3333, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.6881, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(82.5386, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor

CE:  tensor(0.2825, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(40.4789, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2866, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.6334, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(49.1587, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.6383, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.5567, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(37.0001, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.5604, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.3390, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(45.6252, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.3436, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.3121, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(46.1636, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0

CE:  tensor(0.3005, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(22.3677, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.3027, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.2562, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(54.3884, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2616, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.2294, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(17.8018, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2312, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.2804, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(31.2154, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2835, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.3856, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(96.9475, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0

CE:  tensor(0.2551, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(41.2909, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2592, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.3724, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(146.5898, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.3871, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1887, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(456.5268, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2344, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1783, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(249.7240, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2033, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.5365, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(107.7434, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tens

CE:  tensor(0.1595, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(26.0978, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1621, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.5348, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(116.2695, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.5464, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.4745, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(50.7514, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.4796, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.2503, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(27.9415, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2531, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.2072, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(66.8679, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(

CE:  tensor(0.2439, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(15.8169, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2455, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1927, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(25.5747, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1952, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1695, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(8.6039, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1704, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.2255, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(27.9988, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2283, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.3326, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(44.2004, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.

CE:  tensor(0.2067, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(25.5182, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2092, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.3061, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(29.4242, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.3090, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1329, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(23.1551, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1352, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1329, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(24.9117, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1354, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.5112, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(26.1842, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0

CE:  tensor(0.1279, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(49.7762, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1329, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.5280, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(226.6966, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.5507, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.4676, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(92.4073, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.4768, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.2017, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(50.9476, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2068, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1654, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(21.2146, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(

CE:  tensor(0.2378, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(56.1305, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2435, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1869, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(89.6491, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1959, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1606, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(227.3422, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1833, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.2006, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(241.3618, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2247, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.3270, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(108.1199, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tenso

CE:  tensor(0.1797, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(22.1892, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1819, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.2873, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(21.2445, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2894, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1307, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(5.5251, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1313, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1236, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(8.9714, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1245, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.4824, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(74.8595, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.4

CE:  tensor(0.1074, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(8.1335, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1082, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.5029, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(23.1068, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.5053, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.4299, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(43.4009, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.4342, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1980, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(29.5953, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2009, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1701, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(98.0720, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.

CE:  tensor(0.2054, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(76.7781, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2131, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1736, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(114.4843, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1851, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1505, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(65.6738, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1571, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1733, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(46.9641, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1780, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.3034, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(42.2950, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(

CE:  tensor(0.1709, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(29.7583, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1739, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.2841, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(47.4207, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2888, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1153, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(25.8252, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1178, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1032, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(21.6282, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1054, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.5301, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(124.3592, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(

CE:  tensor(0.0894, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(15.6885, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.0909, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.4203, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(6.7509, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.4210, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.4010, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(64.7449, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.4075, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1732, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(49.8818, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1782, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1462, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(74.5152, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.

CE:  tensor(0.1726, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(18.6861, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1744, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1458, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(21.2881, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1479, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.0939, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(0.5491, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.0939, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1362, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(4.8985, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1367, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.2893, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(30.4317, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2

CE:  tensor(0.1238, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(49.7916, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1287, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.2872, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(83.2067, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.2955, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.0852, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(53.9810, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.0906, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.0837, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(16.0015, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.0853, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.3533, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(22.6342, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0

CE:  tensor(0.0841, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(4.2830, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.0846, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.4190, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(17.3960, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.4207, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.3296, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(49.5421, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.3345, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1806, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(26.9418, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1833, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1354, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(28.1467, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.

CE:  tensor(0.1622, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(37.3182, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1659, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1293, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(23.7252, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1317, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.0876, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(1.0733, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.0877, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.1239, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(15.9380, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1255, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.2786, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(45.8370, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.

CE:  tensor(0.1883, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(15.3220, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.1898, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.3043, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(35.0780, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.3078, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.0847, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(38.9772, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.0886, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.0817, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(19.1049, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0.0836, device='cuda:1', dtype=torch.float64, grad_fn=<CopyBackwards>)
CE:  tensor(0.3609, device='cuda:1', grad_fn=<NllLoss2DBackward>)
MSE: tensor(34.4753, device='cuda:1', grad_fn=<MseLossBackward>)
L:   tensor(0