## How to write custom models with fast.ai

Here is an example on how to modify [fastai](https://github.com/fastai/fastai) to use custom pretraied network. 

In [4]:
from fastai.imports import *

In [5]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [6]:
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

In [7]:
import pandas as pd
import numpy as np
path = '/data2/yinterian/IMRT/'
model_path = path + "results/"

## Custom ConvnetBuilder 

This ConvnetBuilderVGG is similar to [ConvnetBuilder](https://github.com/fastai/fastai/blob/master/fastai/conv_learner.py). It is modifiying VGG16. The default vgg16 in fastai "cuts" all the fully connected layers. In this example we keep all layers but the last one. I have a very small dataset so my intention is to train the last two fully connected layer.

In [40]:
# to overide fastai vgg16 function
from torchvision.models import vgg16

# Creates a ConvnetBuilder with all pretrained layers from vgg16 but the last fully connected layer
class ConvnetBuilderVGG():
    """Class representing a convolutional network.
    Arguments:
        c (int): size of the last layer
        is_multi (bool): is multilabel classification
        is_reg (bool): is a regression
        ps (float): dropout parameter for last layer
    """

    def __init__(self, c, is_multi, is_reg, ps=None):
        self.c,self.is_multi,self.is_reg = c,is_multi,is_reg
        self.ps = ps or 0.5
        vgg = vgg16(True)
        self.lr_cut = 30
        layers = children(vgg.features)
        layers += [Flatten()] + children(vgg.classifier)[:5]
        #self.nf = 4096
        # here top model is everything but the last layer
        self.top_model = nn.Sequential(*layers)

        fc_layers = self.create_fc_layer(4096, c, p=None)
        self.n_fc = len(fc_layers)
        self.fc_model = to_gpu(nn.Sequential(*fc_layers))
        apply_init(self.fc_model, kaiming_normal)
        self.model = to_gpu(nn.Sequential(*(layers+fc_layers)))

    def create_fc_layer(self, ni, nf, p, actn=None):
        res=[]
        if p: res.append(nn.Dropout(p=p))
        res.append(nn.Linear(in_features=ni, out_features=nf))
        if actn: res.append(actn())
        return res
    
    @property
    def name(self): return "vgg16"
    
    def get_layer_groups(self, do_fc=False):
        if do_fc: 
            m,idxs = self.fc_model,[]
        else:     
            m,idxs = self.model,[self.lr_cut,-self.n_fc]
        lgs = list(split_by_idxs(children(m),idxs))
        return lgs

In [29]:
bs=32; sz=224
f_model = vgg16
n = 443
val_idxs = get_cv_idxs(n, 0, val_pct=0.2)
tfms = tfms_from_model(f_model, sz)
data = ImageClassifierData.from_csv(path, 'train', f'{path}train.csv', bs, tfms, val_idxs=val_idxs, continuous=True)
# note precompute=False 
models = ConvnetBuilderVGG(data.c, data.is_multi, data.is_reg)
models.model

Sequential (
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU (inplace)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU (inplace)
  (4): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU (inplace)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU (inplace)
  (9): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU (inplace)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU (inplace)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU (inplace)
  (16): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (18): ReLU (inplace)
  (19): Conv2d(512,

In [41]:
class ConvLearnerVGG(ConvLearner):
    
    
    # rewritting pretrained
    @classmethod
    def pretrained(cls, data, ps=None, **kwargs):
        models = ConvnetBuilderVGG(data.c, data.is_multi, data.is_reg, ps=ps)
        return cls(data, models, **kwargs)
    
    # redefining freeze to freeze everything but last layer
    def freeze(self):
        layers = children(self.model)
        n = len(layers)
        for l in layers:
            l.trainable=False
            for p in l.parameters(): p.requires_grad=False
        l = layers[n-1]
        l.trainable=True
        for p in l.parameters(): p.requires_grad=True
            
    def unfreeze_prev_layer(self):
        layers = children(self.model)
        l = layers[35]
        l.trainable=True
        for p in l.parameters(): p.requires_grad=True
        

In [31]:
bs=32; sz=224
f_model = vgg16
n = 443
val_idxs = get_cv_idxs(n, 0, val_pct=0.2)
tfms = tfms_from_model(f_model, sz)

In [32]:
data = ImageClassifierData.from_csv(path, 'train', f'{path}train.csv', bs, tfms, val_idxs=val_idxs, continuous=True)

In [33]:
learn = ConvLearnerVGG.pretrained(data, ps=0.0, precompute=False)

In [34]:
m = learn.models.model
trainable_params_(m)

[Parameter containing:
 1.00000e-02 *
 -2.5014  2.9711 -0.7128  ...  -0.2647 -0.6239  1.1922
 [torch.cuda.FloatTensor of size 1x4096 (GPU 0)], Parameter containing:
  0
 [torch.cuda.FloatTensor of size 1 (GPU 0)]]

In [35]:
learn.unfreeze_prev_layer()
trainable_params_(m)

[Parameter containing:
 -1.1262e-02  1.0421e-02 -1.6899e-03  ...  -1.6088e-02  1.2137e-02  6.5078e-03
 -5.4509e-04 -7.8270e-03  7.1184e-03  ...  -4.0817e-03  9.8776e-03 -1.1085e-02
 -1.0933e-02 -5.1533e-03  1.6766e-02  ...  -3.6180e-03  3.5386e-03 -2.2417e-02
                 ...                   ⋱                   ...                
 -1.0725e-02 -7.2678e-03 -3.8252e-03  ...  -2.4693e-03  8.3481e-03 -5.4105e-03
  5.4018e-03  8.1430e-03 -1.3569e-02  ...   4.0841e-03 -4.1793e-04 -2.2802e-03
  2.8788e-02  6.5824e-03  4.8993e-03  ...   2.4367e-02  6.5563e-03 -7.2610e-03
 [torch.cuda.FloatTensor of size 4096x4096 (GPU 0)], Parameter containing:
  0.0332
  0.0616
  0.0307
    ⋮   
  0.0456
  0.0442
  0.0588
 [torch.cuda.FloatTensor of size 4096 (GPU 0)], Parameter containing:
 1.00000e-02 *
 -2.5014  2.9711 -0.7128  ...  -0.2647 -0.6239  1.1922
 [torch.cuda.FloatTensor of size 1x4096 (GPU 0)], Parameter containing:
  0
 [torch.cuda.FloatTensor of size 1 (GPU 0)]]

## Cross validation

In [36]:
n = 443
bs=32; sz=224

transforms_basic = [RandomRotateXY(10), RandomDihedralXY()]
transforms_basic = [RandomRotateXY(10)]

Here is code to do cross-validation

In [42]:
def get_model_i(i=0):
    val_idxs = get_cv_idxs(n, i, val_pct=0.1)
    tfms = tfms_from_model(f_model, sz, aug_tfms=transforms_basic, max_zoom=1.05)
    data = ImageClassifierData.from_csv(path, 'train', f'{path}train.csv', bs, tfms, val_idxs=val_idxs,
                                        continuous=True)
    learn = ConvLearnerVGG.pretrained(data, ps=0.0, precompute=False)
    return learn

How to train is work in progress

In [43]:
def fit_and_predict(learn):
    learn.fit(1e-3, 3)
    learn.fit(1e-4, 4)
    print("unfreezing")
    learn.unfreeze_prev_layer()
    #learn.fit(1e-5, 3, cycle_len=1, cycle_mult=2)
    learn.fit(1e-5, 3)
    return learn.TTA()

Predictions

In [None]:
preds = []
for i in range(11):
    print("iteration ", i)
    learn = get_model_i(i)
    preds.append(fit_and_predict(learn))

iteration  0


A Jupyter Widget

[ 0.       1.6685   0.94829]                              
[ 1.       1.51883  0.85784]                              
[ 2.       1.44265  0.79279]                              



A Jupyter Widget

[ 0.       1.23457  0.79815]                              
[ 1.       1.26057  0.80246]                              
[ 2.       1.27221  0.79776]                              
[ 3.       1.26376  0.81702]                              

unfreezing


A Jupyter Widget

[ 0.       1.24258  0.80606]                              
[ 1.       1.28194  0.79753]                              
[ 2.       1.28094  0.79773]                              

iteration  1                                 


A Jupyter Widget

[ 0.       1.51842  1.65887]                              
[ 1.       1.44008  1.64367]                              
[ 2.       1.38457  1.65654]                              



A Jupyter Widget

[ 0.       1.27069  1.58639]                              
[ 1.       1.24826  1.57199]                              
[ 2.       1.21741  1.57445]                              
[ 3.       1.19158  1.56302]                              

unfreezing


A Jupyter Widget

[ 0.       1.17024  1.56515]                              
[ 1.       1.20153  1.57576]                              
[ 2.       1.20192  1.57898]                              

iteration  2                                 


A Jupyter Widget

[ 0.       1.51707  2.20786]                              
[ 1.       1.40498  2.22756]                              
[ 2.       1.39592  2.09034]                              



A Jupyter Widget

[ 0.       1.21499  2.11624]                              
[ 1.       1.21059  2.0987 ]                              
[ 2.       1.20059  2.10542]                              
[ 3.       1.18926  2.08794]                              

unfreezing


A Jupyter Widget

[ 0.       1.23297  2.0862 ]                              
[ 1.       1.19877  2.07747]                              
[ 2.       1.19339  2.06899]                              

iteration  3                                 


A Jupyter Widget

[ 0.       1.7207   1.25394]                              
[ 1.       1.5707   1.25557]                              
[ 2.       1.47394  1.30329]                              



A Jupyter Widget

[ 0.       1.23216  1.16033]                              
[ 1.       1.26298  1.22676]                              
[ 2.       1.2541   1.18198]                              
[ 3.       1.21853  1.20137]                              

unfreezing


A Jupyter Widget

[ 0.       1.22858  1.19656]                              
[ 1.       1.2255   1.19418]                              
[ 2.       1.26325  1.18925]                              

iteration  4                                 


A Jupyter Widget

[ 0.       1.60452  1.55931]                              
[ 1.       1.50887  1.47854]                              
[ 2.       1.41685  1.46606]                              



A Jupyter Widget

[ 0.       1.21113  1.38692]                              
[ 1.       1.23493  1.39054]                              
[ 2.       1.2237   1.38537]                              
[ 3.       1.21578  1.39961]                              

unfreezing


A Jupyter Widget

[ 0.       1.17393  1.38887]                              
[ 1.       1.17992  1.38784]                              
[ 2.       1.19518  1.38557]                              

iteration  5                                 


A Jupyter Widget

[ 0.       1.49682  1.14853]                              
[ 1.       1.47694  0.91438]                              
[ 2.       1.40468  0.91854]                              



A Jupyter Widget

[ 0.       1.26226  0.88909]                              
[ 1.       1.26846  0.85841]                              
[ 2.       1.25931  0.86771]                              
[ 3.       1.23911  0.8545 ]                              

unfreezing


A Jupyter Widget

[ 0.       1.26749  0.85512]                              
  0%|          | 0/13 [00:00<?, ?it/s]

In [26]:
def reshape_preds(preds):
    predictions = [preds[i][0] for i in range(11)]
    y = [preds[i][1] for i in range(11)]
    pp = np.vstack(predictions)
    yy = np.vstack(y)
    print(yy.shape)
    pp = np.maximum(pp, 0.0)
    err = np.abs(pp - yy).mean()
    print("err", err)

In [None]:
reshape_preds(preds)