<a href="https://colab.research.google.com/github/vadim0x60/netology-cv-2019/blob/master/day7/007_Transfer_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Пример transfer learning

**Source:** https://gist.github.com/L0SG/2f6d81e4ad119c4f798ab81fa8d62d3f

In [0]:
import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim

# Объявление модели

In [0]:
# toy feed-forward net
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.fc1 = nn.Linear(10, 5)
        self.fc2 = nn.Linear(5, 5)
        self.fc3 = nn.Linear(5, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

# Обучение

In [0]:
# define random data
random_input = Variable(torch.randn(100,10))
random_target = torch.zeros(random_input.shape[0],1)
random_target[:,0] = (random_input**2).sum(axis=1)

print(random_input.shape)
print(random_target.shape)

In [0]:
# define net
net = Net()

# print fc2 weight
print('fc2 weight before train:')
print(net.fc2.weight)

# train the net
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001)
for i in range(100):
    net.zero_grad()
    output = net(random_input)
    loss = criterion(output, random_target)
    loss.backward()
    optimizer.step()

# print the trained fc2 weight
print('fc2 weight after train:')
print(net.fc2.weight)


# Переопределение модели

In [0]:
# save the net
torch.save(net.state_dict(), 'model')

# delete and redefine the net
del net
net = Net()

# load the weight
net.load_state_dict(torch.load('model'))

# print the pre-trained fc2 weight
print('fc2 pretrained weight (same as the one above):')
print(net.fc2.weight)

# define new random data
random_input = Variable(torch.randn(100,10))
random_target = torch.zeros(random_input.shape[0],1)
random_target[:,0] = (random_input**2).sum(axis=1)

# we want to freeze the fc2 layer this time: only train fc1 and fc3
net.fc2.weight.requires_grad = False
net.fc2.bias.requires_grad = False

# train again
criterion = nn.MSELoss()

# NOTE: pytorch optimizer explicitly accepts parameter that requires grad
# see https://github.com/pytorch/pytorch/issues/679
#optimizer = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=0.001)
# this raises ValueError: optimizing a parameter that doesn't require gradients
optimizer = optim.Adam(net.parameters(), lr=0.001)

for i in range(100):
    net.zero_grad()
    output = net(random_input)
    loss = criterion(output, random_target)
    loss.backward()
    optimizer.step()


# Сравнение моделей

In [0]:

# print the retrained fc2 weight
# note that the weight is same as the one before retraining: only fc1 & fc3 changed
print('fc2 weight (frozen) after retrain:')
print(net.fc2.weight)

# let's unfreeze the fc2 layer this time for extra tuning
net.fc2.weight.requires_grad = True
net.fc2.bias.requires_grad = True

# add the unfrozen fc2 weight to the current optimizer
#optimizer.add_param_group({'params': net.fc2.parameters()})

# re-retrain
for i in range(100):
    net.zero_grad()
    output = net(random_input)
    loss = criterion(output, random_target)
    loss.backward()
    optimizer.step()

# print the re-retrained fc2 weight
# note that this time the fc2 weight also changed
print('fc2 weight (unfrozen) after re-retrain:')
print(net.fc2.weight)