# Check training

In [1]:
import sys

sys.path.insert(0, "../code")

In [2]:
from utils import get_train_test_loaders

In [3]:
train_labelled_loader, train_unlabelled_loader, test_loader = \
    get_train_test_loaders(dataset_name="CIFAR10", 
                           num_labelled_samples=4000,
                           path="../input/cifar10",
                           batch_size=16,
                           unlabelled_batch_size=32,
                           num_workers=12)


Files already downloaded and verified


In [4]:
from utils import get_model

In [5]:
device = "cuda"
batch_size = 64
num_epochs = 200
config = {
    "dataset": "CIFAR10",
    "data_path": ".",

    "model": "fastresnet",

    "momentum": 0.9,
    "weight_decay": 1e-4,
    "batch_size": batch_size,
    "unlabelled_batch_size": 320,
    "num_workers": 10,

    "num_epochs": num_epochs,

    "lr_milestones_values": [(0, 0.0), (5, 1.0), (num_epochs, 0.0)],

    "num_labelled_samples": 4000,
}


In [6]:
model = get_model(config['model'])
model = model.to(device)


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim


In [8]:
optimizer = optim.SGD(model.parameters(), lr=0.0,
                      momentum=config['momentum'],
                      weight_decay=config['weight_decay'],
                      nesterov=True)

criterion = nn.CrossEntropyLoss()
consistency_criterion = nn.MSELoss()


In [12]:
from ignite.contrib.handlers import PiecewiseLinear


le = len(train_labelled_loader)
milestones_values = [(le * m, v) for m, v in config['lr_milestones_values']]
scheduler = PiecewiseLinear(optimizer, "lr", milestones_values=milestones_values)

def _prepare_batch(batch, device, non_blocking):
    x, y = batch
    return (convert_tensor(x, device=device, non_blocking=non_blocking),
            convert_tensor(y, device=device, non_blocking=non_blocking))

def cycle(iterable):
    while True:
        for i in iterable:
            yield i

train_unlabelled_loader_iter = cycle(train_unlabelled_loader)
train_labelled_loader_iter = cycle(train_labelled_loader)

In [13]:
unsup_x, unsup_aug_x = next(train_unlabelled_loader_iter)
labelled_batch = next(train_labelled_loader_iter)

In [15]:
from ignite.utils import convert_tensor


x, y = _prepare_batch(labelled_batch, device=device, non_blocking=True)
unsup_x = convert_tensor(unsup_x, device=device, non_blocking=True)
unsup_aug_x = convert_tensor(unsup_aug_x, device=device, non_blocking=True)


In [16]:
model.train()
# Supervised part        
y_pred = model(x)

In [17]:
y_pred

tensor([[-0.1528, -0.1141, -0.0512, -0.1139, -0.0534,  0.1363,  0.0660, -0.2587,
         -0.1338,  0.1013],
        [-0.0366, -0.0968, -0.0015, -0.0371, -0.0577,  0.1379,  0.0684, -0.1793,
         -0.1218,  0.0842],
        [-0.0623, -0.0666,  0.0254, -0.1177, -0.0480,  0.1450,  0.0931, -0.1944,
         -0.2085,  0.0915],
        [-0.0821, -0.0944,  0.0107, -0.0844,  0.0015,  0.1568,  0.1070, -0.2398,
         -0.1294,  0.1367],
        [-0.1292, -0.0873,  0.0641, -0.0344, -0.0791,  0.1556,  0.1828, -0.2433,
         -0.1491,  0.1579],
        [-0.0876, -0.1499,  0.0490, -0.1176, -0.0755,  0.1589,  0.1416, -0.1794,
         -0.2034,  0.1096],
        [-0.1641, -0.0606,  0.0983, -0.0849, -0.1453,  0.1362,  0.1545, -0.3549,
         -0.2024,  0.1620],
        [-0.0421, -0.1143, -0.0493, -0.0796,  0.0319,  0.0496,  0.0486, -0.1292,
         -0.1355,  0.0986],
        [-0.1042, -0.0773,  0.0064, -0.0575, -0.0168,  0.1594,  0.0885, -0.2316,
         -0.1100,  0.1103],
        [-0.0978, -

In [18]:
thresholds = torch.linspace(0.1, 1.0, steps=len(train_labelled_loader) * config['num_epochs'])

In [20]:
t

0.10000000149011612

In [24]:
import warnings


def foo(y_pred, y, step):
    t = thresholds[step].item()
    y_pred_softmax = torch.softmax(y_pred, dim=1)
    res = y_pred_softmax.gather(dim=1, index=y.unsqueeze(dim=1))
    print(res)
    mask = (res < t).squeeze(dim=1)
    print(mask)
    if mask.sum() > 0:            
        return y_pred[mask], y[mask]

    warnings.warn("Threshold {} is too low, all predictions are discarded.\n".format(t) +
                  "y_pred.min/max: {}, {}".format(res.min(), res.max()))
    return y_pred, y
    

In [25]:
foo(y_pred, y, 0)

tensor([[0.1164],
        [0.1018],
        [0.0846],
        [0.0891],
        [0.0922],
        [0.0941],
        [0.1183],
        [0.1081],
        [0.0807],
        [0.1120],
        [0.1134],
        [0.0976],
        [0.1022],
        [0.0896],
        [0.0941],
        [0.1043]], device='cuda:0', grad_fn=<GatherBackward>)
tensor([0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0], device='cuda:0',
       dtype=torch.uint8)


(tensor([[0.0966, 0.0962, 0.1054, 0.0914, 0.0980, 0.1188, 0.1128, 0.0846, 0.0835,
          0.1127],
         [0.0935, 0.0923, 0.1025, 0.0932, 0.1016, 0.1187, 0.1129, 0.0798, 0.0891,
          0.1163],
         [0.0884, 0.0922, 0.1073, 0.0972, 0.0930, 0.1176, 0.1208, 0.0789, 0.0867,
          0.1179],
         [0.0941, 0.0884, 0.1079, 0.0913, 0.0952, 0.1204, 0.1183, 0.0859, 0.0838,
          0.1146],
         [0.0916, 0.0941, 0.1024, 0.0960, 0.1000, 0.1193, 0.1111, 0.0807, 0.0911,
          0.1136],
         [0.0935, 0.0976, 0.1041, 0.0964, 0.0947, 0.1148, 0.1140, 0.0825, 0.0896,
          0.1128],
         [0.0978, 0.0952, 0.1013, 0.0932, 0.1047, 0.1134, 0.1046, 0.0896, 0.0903,
          0.1099],
         [0.0909, 0.0941, 0.1089, 0.0926, 0.0992, 0.1108, 0.1113, 0.0891, 0.0880,
          0.1150]], device='cuda:0', grad_fn=<IndexBackward>),
 tensor([7, 8, 1, 0, 7, 1, 7, 1], device='cuda:0'))

In [None]:

y_pred, y = tsa(y_pred, y, step=engine.state.iteration - 1)

loss = criterion(y_pred, y)

# Unsupervised part
y_pred_a = model(unsup_x)
y_pred_b = model(unsup_aug_x)
consistency_loss = consistency_criterion(y_pred_a, y_pred_b)

final_loss = loss + lam * consistency_loss

optimizer.zero_grad()
final_loss.backward()
optimizer.step()
