In [1]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
%matplotlib inline
from mpl_toolkits.mplot3d import Axes3D

import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data
from torchvision import datasets, transforms

from tqdm import tqdm
import random, time, os, sys, json

In [5]:
## Try: Train a model with random gradient backprop
## Find: if the (rand-grad with and without softmax) or any method work as semi-supervised model.

In [6]:
device = torch.device("cuda:1")
# device = torch.device("cpu")

## For FMNIST dataset

In [7]:
train_transform = transforms.Compose([
            transforms.ToTensor(),
        ])
test_transform = transforms.Compose([
            transforms.ToTensor(),
        ])

train_dataset = datasets.FashionMNIST(root="../../_Datasets/FMNIST/", train=True, download=True, transform=train_transform)
test_dataset = datasets.FashionMNIST(root="../../_Datasets/FMNIST/", train=False, download=True, transform=test_transform)

In [8]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=50, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=50, shuffle=False, num_workers=2)

In [11]:
## demo of train loader
for xx, yy in train_loader:
    print(xx.shape)
    break

torch.Size([50, 1, 28, 28])


## Final Model

In [60]:
class FMNIST_ConvNet(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.backbone = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, stride=1),
            nn.ReLU(),
            nn.Conv2d(16, 16, kernel_size=5, stride=2),
            nn.ReLU(),
            nn.Conv2d(16, 16, kernel_size=3, stride=1),
        )
        
    def forward(self, x):
        bs = x.shape[0]
        x = self.backbone(x).view(bs, -1)
        return x

In [61]:
model = FMNIST_ConvNet()
# print("number of params: ", sum(p.numel() for p in model.parameters()))
model

FMNIST_ConvNet(
  (backbone): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(16, 16, kernel_size=(5, 5), stride=(2, 2))
    (3): ReLU()
    (4): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1))
  )
)

In [62]:
# model

In [63]:
model(xx).shape

torch.Size([50, 1024])

In [64]:
# asdfasdf

In [65]:
model = model.to(device)

In [66]:
print("number of params: ", sum(p.numel() for p in model.parameters()))

number of params:  9152


## Training

In [67]:
 ## debugging to find if random gradient makes does better than random initialized model
model_name = 'random_gradient_pretrain'

In [68]:
EPOCHS = 20
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

# NORMALIZE = True
# RAND_GRAD = True

In [69]:
## Following is copied from 
### https://github.com/kuangliu/pytorch-cifar/blob/master/main.py

# Training
def train(epoch, model, optimizer, NORMALIZE = True, RAND_GRAD = True):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader)):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
#         loss = criterion(outputs, targets)
        
        if NORMALIZE: 
            outputs.register_hook(lambda grad: grad/torch.norm(grad, dim=1, keepdim=True))

        if RAND_GRAD:
            grad = torch.randn_like(outputs)
            outputs.backward(gradient=grad)
            loss = -1
        else:
            loss = criterion(outputs,targets)
            loss.backward()

        optimizer.step()

    print(f"[Train] {epoch} Loss: {loss:.3f}")
    return loss

In [70]:
def train_final_layer(epoch, model, optimizer, NORMALIZE = False):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader)):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        
        if NORMALIZE: 
            outputs.register_hook(lambda grad: grad/torch.norm(grad, dim=1, keepdim=True))

        loss = criterion(outputs,targets)
        loss.backward()

        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
    
    loss = train_loss/(batch_idx+1)
    acc = 100.*correct/total
    print(f"[Train - FC] {epoch} Loss: {loss:.3f} | Acc: {acc:.3f} {correct}/{total}")
    return loss, acc

In [71]:
# best_acc = -1
def test(epoch, model, optimizer, best_acc, model_name):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    latency = []
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(tqdm(test_loader)):
            inputs, targets = inputs.to(device), targets.to(device)
            
            start = time.time()
            outputs = model(inputs)
            ttaken = time.time()-start
                
            loss = criterion(outputs, targets)
            
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            latency.append(ttaken)
    
    loss = test_loss/(batch_idx+1)
    acc = 100.*correct/total
    print(f"[Test] {epoch} Loss: {test_loss/(batch_idx+1):.3f} | Acc: {100.*correct/total:.3f} {correct}/{total}")
    
    # Save checkpoint.
    acc = 100.*correct/total
    if acc > best_acc:
        print('Saving..')
        state = {
            'model': model.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('models'):
            os.mkdir('models')
        torch.save(state, f'./models/{model_name}.pth')
        best_acc = acc
        
    return loss, acc, best_acc, latency

In [72]:
start_epoch = 0  # start from epoch 0 or last checkpoint epoch
resume = False

if resume:
    # Load checkpoint.
    print('==> Resuming from checkpoint..')
    assert os.path.isdir('./models'), 'Error: no checkpoint directory found!'
    checkpoint = torch.load(f'./models/{model_name}.pth')
    model.load_state_dict(checkpoint['model'])
    best_acc = checkpoint['acc']
    start_epoch = checkpoint['epoch']

In [74]:
# # ### Train the whole damn thing

best_acc = -1
for epoch in range(start_epoch, start_epoch+EPOCHS): ## for 200 epochs
    trloss = train(epoch, model, optimizer, NORMALIZE=True, RAND_GRAD=True)
    
    sup_model = nn.Sequential(
        model,
        nn.Linear(1024, 10))
    
    opt = optim.Adam(sup_model[-1].parameters(), lr=0.001)
    for i in range(2):
        print(f"Finetuening the model epoch {i+1}")
        trloss, tracc = train_final_layer(i, model, opt, NORMALIZE=False)
    
    teloss, teacc, best_acc, latency = test(epoch, model, optimizer, best_acc, model_name)
    scheduler.step()

100%|████████████████████████████████████████████████| 1200/1200 [00:02<00:00, 473.41it/s]


[Train] 0 Loss: -1.000
Finetuening the model epoch 1


100%|████████████████████████████████████████████████| 1200/1200 [00:02<00:00, 446.23it/s]


[Train - FC] 0 Loss: 8.651 | Acc: 0.000 0/60000
Finetuening the model epoch 2


100%|████████████████████████████████████████████████| 1200/1200 [00:02<00:00, 469.36it/s]


[Train - FC] 1 Loss: 8.651 | Acc: 0.000 0/60000


100%|██████████████████████████████████████████████████| 200/200 [00:00<00:00, 420.90it/s]


[Test] 0 Loss: 8.646 | Acc: 0.000 0/10000
Saving..


100%|████████████████████████████████████████████████| 1200/1200 [00:02<00:00, 463.14it/s]


[Train] 1 Loss: -1.000
Finetuening the model epoch 1


100%|████████████████████████████████████████████████| 1200/1200 [00:02<00:00, 465.24it/s]


[Train - FC] 0 Loss: 8.203 | Acc: 0.000 0/60000
Finetuening the model epoch 2


100%|████████████████████████████████████████████████| 1200/1200 [00:02<00:00, 468.39it/s]


[Train - FC] 1 Loss: 8.203 | Acc: 0.000 0/60000


100%|██████████████████████████████████████████████████| 200/200 [00:00<00:00, 437.09it/s]


[Test] 1 Loss: 8.194 | Acc: 0.000 0/10000


100%|████████████████████████████████████████████████| 1200/1200 [00:02<00:00, 473.49it/s]


[Train] 2 Loss: -1.000
Finetuening the model epoch 1


100%|████████████████████████████████████████████████| 1200/1200 [00:02<00:00, 477.19it/s]


[Train - FC] 0 Loss: 7.338 | Acc: 0.000 0/60000
Finetuening the model epoch 2


100%|████████████████████████████████████████████████| 1200/1200 [00:02<00:00, 471.23it/s]


[Train - FC] 1 Loss: 7.338 | Acc: 0.000 0/60000


100%|██████████████████████████████████████████████████| 200/200 [00:00<00:00, 424.12it/s]


[Test] 2 Loss: 7.335 | Acc: 0.000 0/10000


100%|████████████████████████████████████████████████| 1200/1200 [00:02<00:00, 459.39it/s]


[Train] 3 Loss: -1.000
Finetuening the model epoch 1


100%|████████████████████████████████████████████████| 1200/1200 [00:02<00:00, 462.28it/s]


[Train - FC] 0 Loss: 8.929 | Acc: 0.000 0/60000
Finetuening the model epoch 2


100%|████████████████████████████████████████████████| 1200/1200 [00:02<00:00, 461.17it/s]


[Train - FC] 1 Loss: 8.929 | Acc: 0.000 0/60000


100%|██████████████████████████████████████████████████| 200/200 [00:00<00:00, 430.16it/s]


[Test] 3 Loss: 8.915 | Acc: 0.000 0/10000


100%|████████████████████████████████████████████████| 1200/1200 [00:02<00:00, 461.46it/s]


[Train] 4 Loss: -1.000
Finetuening the model epoch 1


100%|████████████████████████████████████████████████| 1200/1200 [00:02<00:00, 477.43it/s]


[Train - FC] 0 Loss: 10.353 | Acc: 0.000 0/60000
Finetuening the model epoch 2


 25%|████████████▎                                    | 301/1200 [00:00<00:02, 437.45it/s]


KeyboardInterrupt: 

In [51]:
# ! mkdir models