In [1]:
from __future__ import print_function
import torch as t
from torch import nn
from torch.autograd import Variable as V

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.features = nn.Sequential(
                    nn.Conv2d(3, 6, 5),
                    nn.ReLU(),
                    nn.MaxPool2d(2, 2),
                    nn.Conv2d(6, 16, 5),
                    nn.ReLU(),
                    nn.MaxPool2d(2, 2)
        )
        self.classifier = nn.Sequential(
                    nn.Linear(16*5*5, 120),
                    nn.ReLU(),
                    nn.Linear(120, 84),
                    nn.ReLU(),
                    nn.Linear(84, 10)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = x.view(-1, 16*5*5)
        x = self.classifier(x)
        return x

net = Net()

In [15]:
# optimizer

from torch import optim
optimizer = optim.SGD(params=net.parameters(), lr=1)
optimizer.zero_grad() #net.zero_grad()

input = V(t.randn(1, 3, 32, 32))
output = net(input)

output.backward(output)

optimizer.step()



In [17]:
from torch import optim
optimizer = optim.SGD([{'params': net.features.parameters()},
                      {'params': net.classifier.parameters(), 'lr': 1e-2}], lr=1e-5)
optimizer.zero_grad() #net.zero_grad()

input = V(t.randn(1, 3, 32, 32))
output = net(input)

output.backward(output)

optimizer.step()

In [23]:
from torch import optim

special_layers = nn.ModuleList([net.classifier[0], net.classifier[3]])
special_layers_params = list(map(id, special_layers.parameters()))

base_params = filter(lambda p: id(p) not in special_layers_params, net.parameters())

optimizer = t.optim.SGD([{'params': base_params},
                            {'params': special_layers.parameters(), 'lr': 0.01}],
                           lr = 0.001)


optimizer.zero_grad() #net.zero_grad()

input = V(t.randn(1, 3, 32, 32))
output = net(input)

output.backward(output)

optimizer.step()

In [24]:
from torch import optim
old_lr = 0.1
optimizer = optim.SGD([{'params': net.features.parameters()},
                      {'params': net.classifier.parameters(), 'lr': old_lr * 0.1}],
                      lr=1e-5)
optimizer.zero_grad() #net.zero_grad()

input = V(t.randn(1, 3, 32, 32))
output = net(input)

output.backward(output)

optimizer.step()


In [28]:
# nn.functional

input = V(t.randn(2, 3))
model = nn.Linear(3, 4)
output1 = model(input)
output2 = nn.functional.linear(input, model.weight, model.bias)

print(output1 == output2)

b = nn.functional.relu(input)
b2 = nn.ReLU()(input)
print(b == b2)


tensor([[1, 1, 1, 1],
        [1, 1, 1, 1]], dtype=torch.uint8)
tensor([[1, 1, 1],
        [1, 1, 1]], dtype=torch.uint8)


In [30]:
from torch.nn import functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        x = F.pool(F.relu(self.conv1(x)), 2)
        x = F.pool(F.relu(self.conv2(x)), 2)
        x = x.view(-1, 16*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [38]:
# init
from torch.nn import init
linear = nn.Linear(3, 4)

t.manual_seed(1)
# linear.weight.data.normal_(0, std)
print(init.xavier_normal_(linear.weight))

import math
t.manual_seed(1)

std = math.sqrt(2)/math.sqrt(7.)
print(linear.weight.data.normal_(0, std))

for name, params in net.named_parameters():
    print(name, type(name), params, type(params))
    if name.find('linear') != -1:
        params[0]
        params[1]
    elif name.find('conv') != -1:
        pass
    elif name.find('norm') != -1:
        pass


Parameter containing:
tensor([[ 0.3535,  0.1427,  0.0330],
        [ 0.3321, -0.2416, -0.0888],
        [-0.8140,  0.2040, -0.5493],
        [-0.3010, -0.4769, -0.0311]], requires_grad=True)
tensor([[ 0.3535,  0.1427,  0.0330],
        [ 0.3321, -0.2416, -0.0888],
        [-0.8140,  0.2040, -0.5493],
        [-0.3010, -0.4769, -0.0311]])
features.0.weight <type 'str'> Parameter containing:
tensor([[[[-0.0534, -0.0866,  0.0140, -0.0097,  0.0045],
          [-0.0859,  0.0663, -0.0899,  0.0496, -0.1067],
          [ 0.1091, -0.0783, -0.0811, -0.0806, -0.0452],
          [-0.0853,  0.0910, -0.0110,  0.0692, -0.1019],
          [-0.0962,  0.1045, -0.0720,  0.0369, -0.0397]],

         [[-0.0023,  0.0428,  0.0723, -0.0414,  0.0826],
          [-0.0467, -0.0145, -0.0549, -0.1008, -0.0979],
          [-0.0487,  0.0577, -0.1029,  0.0453,  0.0403],
          [ 0.0521, -0.0706, -0.0495,  0.0880, -0.0885],
          [ 0.0567,  0.0451, -0.0128, -0.0025, -0.0403]],

         [[ 0.0173, -0.1033, -0.0