# 9 Transfer Learning 

In [1]:
from matplotlib import pyplot
%matplotlib inline
import numpy as np
from pathlib import Path
import requests
import pickle
import gzip
import torch
import torch.nn.functional as F
from torch import nn
from torch import optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class DNN(nn.Module):
    def __init__(self,n_inputs, n_hidden, n_outputs, activation):
        super(DNN, self).__init__()
        if type(activation) == nn.ReLU():
            self.model = nn.Sequential(
            nn.Linear(n_inputs, n_hidden), 
            nn.ReLU(), 
            nn.Linear(n_hidden, n_hidden), 
            nn.ReLU(), 
            nn.Linear(n_hidden, n_hidden),
            nn.ReLU(),
            nn.Linear(n_hidden, n_hidden),
            nn.ReLU(),
            nn.Linear(n_hidden, n_outputs))
            
        elif type(activation) == nn.LeakyReLU():
            self.model = nn.Sequential(
            nn.Linear(n_inputs, n_hidden), 
            nn.LeakyReLU(), 
            nn.Linear(n_hidden, n_hidden), 
            nn.LeakyReLU(), 
            nn.Linear(n_hidden, n_hidden),
            nn.LeakyReLU(),
            nn.Linear(n_hidden, n_hidden),
            nn.LeakyReLU(),
            nn.Linear(n_hidden, n_outputs))
            
        else:
            self.model = nn.Sequential(
            nn.Linear(n_inputs, n_hidden), 
            nn.ELU(), 
            nn.Linear(n_hidden, n_hidden), 
            nn.ELU(), 
            nn.Linear(n_hidden, n_hidden),
            nn.ELU(),
            nn.Linear(n_hidden, n_hidden),
            nn.ELU(),
            nn.Linear(n_hidden, n_outputs))
    
    def forward(self, x):
        return self.model(x)

## A

In [61]:
n_inputs = 28*28
n_hidden = 100
n_outputs = 5
activation = nn.ELU()
model = DNN(n_inputs, n_hidden, n_outputs, activation)
optimizer = optim.Adam(model.parameters(), lr=0.01)
PATH = "./vannilla.pth"
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
model.state_dict()

OrderedDict([('model.0.weight',
              tensor([[ 0.0216, -0.0265,  0.0138,  ...,  0.0306, -0.0132,  0.0301],
                      [ 0.0286, -0.0122, -0.0140,  ..., -0.0076,  0.0306, -0.0084],
                      [-0.0238, -0.0222, -0.0069,  ..., -0.0004, -0.0245,  0.0232],
                      ...,
                      [-0.0308, -0.0029, -0.0316,  ...,  0.0262, -0.0231,  0.0315],
                      [ 0.0353,  0.0127,  0.0205,  ...,  0.0135,  0.0322,  0.0239],
                      [-0.0011,  0.0204, -0.0049,  ..., -0.0205,  0.0117,  0.0125]])),
             ('model.0.bias',
              tensor([-0.5509, -0.6463, -0.4328, -0.7736, -0.8765, -0.7604, -0.6699, -0.3786,
                      -0.5138, -1.0142,  0.1629, -0.5110, -0.3658, -0.7885, -1.0391, -0.5711,
                       1.4764,  1.3322, -1.4863, -0.4116, -0.4133, -0.9404, -1.3470, -0.0628,
                      -0.4701, -0.1700, -0.7294, -0.7105, -0.6562, -0.3478, -1.2315, -0.9590,
                      -1.327

In [62]:
count = 0
for param in model.parameters():
    if count < 3:
        param.requires_grad = False
    count += 1
extraLayer = nn.Linear(5,5)
new_model = nn.Sequential(model, extraLayer)


## B

In [16]:
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "http://deeplearning.net/data/mnist/"
FILENAME = "mnist.pkl.gz"
if not (PATH / FILENAME).exists():
        content = requests.get(URL + FILENAME).content
        (PATH / FILENAME).open("wb").write(content)

In [31]:
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")

In [32]:
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

In [33]:
x_train = np.concatenate((x_train,x_valid[:5000]),axis=0)
y_train = np.concatenate((y_train,y_valid[:5000]),axis=0)
x_valid = x_valid[:5000]
y_valid = y_valid[:5000]

X_train_five_nine = x_train[y_train >= 5]
y_train_five_nine = y_train[y_train >= 5] - 5
X_valid_five_nine = x_valid[y_valid >= 5]
y_valid_five_nine = y_valid[y_valid >= 5] - 5

X_train_five_nineb, y_train_five_nineb = sample_n_instances_per_class(X_train_five_nine, y_train_five_nine, n=100)
X_valid_five_nineb, y_valid_five_nineb = sample_n_instances_per_class(X_valid_five_nine, y_valid_five_nine,n=30)


In [34]:
X_train_five_nineb, y_train_five_nineb, X_valid_five_nineb, y_valid_five_nineb = map(
    torch.tensor, (X_train_five_nineb, y_train_five_nineb, X_valid_five_nineb, y_valid_five_nineb)
)

In [35]:
def loss_batch(model, loss_func, xb, yb, opt=None):
    loss = loss_func(model(xb), yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    return loss.item(), len(xb)

def get_data(train_ds, valid_ds, bs):
    return (
        DataLoader(train_ds, batch_size=bs, shuffle=True),
        DataLoader(valid_ds, batch_size=bs * 2),
    )

def accuracy(out, yb):
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()

In [43]:
def fit_early_stop(epochs, model, loss_func, opt, train_dl, valid_dl):
    bs = 20
    best_loss = np.infty
    epochs_without_progress = 0
    max_epochs_without_progress = 20
    PATH = './bestmodeltorch.pth'
    for epoch in range(epochs):
        model.train()
        for xb, yb in train_dl:
            loss_batch(model, loss_func, xb, yb, opt)
        
        model.eval()
        with torch.no_grad():
            losses, nums = zip(
                *[loss_batch(model, loss_func, xb_val, yb_val) for xb_val, yb_val in valid_dl]
            )
            
        val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
        val_accuracy = accuracy(model(X_valid_five_nineb),y_valid_five_nineb)
        if epoch % 5 == 0:
            print("Epoch:", epoch,
                  "\tValidation accuracy: {:.3f}%".format(val_accuracy * 100),
                  "\tLoss: {:.5f}".format(val_loss))

        if val_loss < best_loss:
            best_loss = val_loss
            torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': opt.state_dict(),
            'loss': loss_func,
            }, PATH)
            epochs_without_progress = 0
        else:
            epochs_without_progress += 1
            if epochs_without_progress > max_epochs_without_progress:
                print("Early stopping")
                break

In [37]:
n, c = X_train_five_nineb.shape
print (X_train_five_nineb.shape)
print (y_train_five_nineb.shape)
print (X_valid_five_nineb.shape)
print (y_valid_five_nineb.shape)

torch.Size([500, 784])
torch.Size([500])
torch.Size([150, 784])
torch.Size([150])


In [45]:
bs = 30
n_batches = int(np.ceil( n / bs))

train_ds = TensorDataset(X_train_five_nineb, y_train_five_nineb)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)

valid_ds = TensorDataset(X_valid_five_nineb, y_valid_five_nineb)
valid_dl = DataLoader(valid_ds, batch_size=bs * 2)


In [63]:
extraLayer = nn.Linear(5,5)
new_model = nn.Sequential(model, extraLayer)
epochs = 100
fit_early_stop(epochs, new_model, loss, optimizer, train_dl, valid_dl)

Epoch: 0 	Validation accuracy: 24.667% 	Loss: 1.93811
Epoch: 5 	Validation accuracy: 30.000% 	Loss: 1.54846
Epoch: 10 	Validation accuracy: 30.667% 	Loss: 1.61198
Epoch: 15 	Validation accuracy: 30.000% 	Loss: 1.57903
Epoch: 20 	Validation accuracy: 32.667% 	Loss: 1.77533
Epoch: 25 	Validation accuracy: 35.333% 	Loss: 1.68716
Epoch: 30 	Validation accuracy: 24.000% 	Loss: 1.59105
Early stopping


## C

In [55]:
for param in model.parameters():
    param.requires_grad = True
fit_early_stop(epochs, model, loss, optimizer, train_dl, valid_dl)

Epoch: 0 	Validation accuracy: 24.000% 	Loss: 1.91916
Epoch: 5 	Validation accuracy: 65.333% 	Loss: 0.97875
Epoch: 10 	Validation accuracy: 62.000% 	Loss: 1.21565
Epoch: 15 	Validation accuracy: 64.000% 	Loss: 0.87364
Epoch: 20 	Validation accuracy: 72.000% 	Loss: 0.86426
Epoch: 25 	Validation accuracy: 63.333% 	Loss: 1.21268
Epoch: 30 	Validation accuracy: 72.000% 	Loss: 1.07909
Epoch: 35 	Validation accuracy: 76.000% 	Loss: 1.14182
Epoch: 40 	Validation accuracy: 70.000% 	Loss: 1.28007
Early stopping


## D

In [64]:
count = 0
for param in model.parameters():
    if count == 4:
        param.requires_grad = False
    count += 1
extraLayer = nn.Softmax()
new_model = nn.Sequential(model, extraLayer)
epochs = 100
fit_early_stop(epochs, new_model, loss, optimizer, train_dl, valid_dl)

Epoch: 0 	Validation accuracy: 31.333% 	Loss: 1.55632
Epoch: 5 	Validation accuracy: 38.667% 	Loss: 1.52450
Epoch: 10 	Validation accuracy: 41.333% 	Loss: 1.51208
Epoch: 15 	Validation accuracy: 41.333% 	Loss: 1.50700
Epoch: 20 	Validation accuracy: 40.667% 	Loss: 1.50052
Epoch: 25 	Validation accuracy: 41.333% 	Loss: 1.51017
Epoch: 30 	Validation accuracy: 38.667% 	Loss: 1.52197
Epoch: 35 	Validation accuracy: 38.667% 	Loss: 1.51489
Early stopping


In [68]:
count = 0
for param in model.parameters():
    param.requires_grad = True
for param in model.parameters():
    if count > 1:
        param.requires_grad = False
    count += 1
    
extraLayer = nn.Softmax()
new_model = nn.Sequential(model, extraLayer)
epochs = 100
fit_early_stop(epochs, new_model, loss, optimizer, train_dl, valid_dl)

Epoch: 0 	Validation accuracy: 40.000% 	Loss: 1.50309
Epoch: 5 	Validation accuracy: 46.000% 	Loss: 1.42651
Epoch: 10 	Validation accuracy: 47.333% 	Loss: 1.42269
Epoch: 15 	Validation accuracy: 48.667% 	Loss: 1.41738
Epoch: 20 	Validation accuracy: 48.000% 	Loss: 1.41531
Epoch: 25 	Validation accuracy: 48.667% 	Loss: 1.41251
Epoch: 30 	Validation accuracy: 48.667% 	Loss: 1.40919
Epoch: 35 	Validation accuracy: 48.667% 	Loss: 1.40402
Epoch: 40 	Validation accuracy: 50.000% 	Loss: 1.40138
Epoch: 45 	Validation accuracy: 50.000% 	Loss: 1.39931
Epoch: 50 	Validation accuracy: 50.667% 	Loss: 1.39764
Epoch: 55 	Validation accuracy: 51.333% 	Loss: 1.39688
Epoch: 60 	Validation accuracy: 51.333% 	Loss: 1.39588
Epoch: 65 	Validation accuracy: 51.333% 	Loss: 1.39560
Epoch: 70 	Validation accuracy: 52.000% 	Loss: 1.39405
Epoch: 75 	Validation accuracy: 52.000% 	Loss: 1.39290
Epoch: 80 	Validation accuracy: 52.000% 	Loss: 1.39195
Epoch: 85 	Validation accuracy: 52.000% 	Loss: 1.39185
Epoch: 90 	V

In [69]:
for param in model.parameters():
    param.requires_grad = True

extraLayer = nn.Softmax()
new_model = nn.Sequential(model, extraLayer)
epochs = 100
fit_early_stop(epochs, new_model, loss, optimizer, train_dl, valid_dl)

Epoch: 0 	Validation accuracy: 52.000% 	Loss: 1.39068
Epoch: 5 	Validation accuracy: 51.333% 	Loss: 1.38851
Epoch: 10 	Validation accuracy: 50.000% 	Loss: 1.38884
Epoch: 15 	Validation accuracy: 50.667% 	Loss: 1.39127
Epoch: 20 	Validation accuracy: 51.333% 	Loss: 1.38992
Epoch: 25 	Validation accuracy: 50.667% 	Loss: 1.38936
Epoch: 30 	Validation accuracy: 51.333% 	Loss: 1.39187
Epoch: 35 	Validation accuracy: 51.333% 	Loss: 1.38797
Epoch: 40 	Validation accuracy: 50.667% 	Loss: 1.39090
Early stopping
