In [2]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

X = torch.rand(2, 20) # batch size: 2, imput dim: 20
net(X)

tensor([[-0.1115,  0.3322, -0.0031,  0.1038, -0.3966, -0.0660,  0.0443,  0.1595,
         -0.0461, -0.0394],
        [-0.2531,  0.2305,  0.0266,  0.0639, -0.3633, -0.0140, -0.0341,  0.0865,
          0.0382,  0.0827]], grad_fn=<AddmmBackward0>)

In [3]:
# every model should be a subclass of nn.Module
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
        
    def forward(self, X):
        # F.relu is a function, nn.ReLU is a instance
        return self.out(F.relu(self.hidden(X)))

In [4]:
net = MLP()
net(X)

tensor([[-0.1468,  0.1570,  0.0811,  0.0460, -0.1833,  0.2108, -0.0878, -0.0079,
         -0.0353, -0.2835],
        [-0.1178,  0.0905,  0.0711,  0.1237, -0.1114,  0.1193, -0.1555, -0.1597,
          0.0274, -0.1431]], grad_fn=<AddmmBackward0>)

In [5]:
# nn.Sequential implementation
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            # self._modules is a dictionary in which each block represent a predefined layer
            self._modules[block] = block
    
    def forward(self, X):
        for block in self._modules.values():
            X = block(X)
        return X

net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)     

tensor([[-0.2961, -0.0485, -0.0401, -0.1405,  0.1125, -0.0393,  0.2325,  0.0815,
          0.1691, -0.0885],
        [-0.2495,  0.0345, -0.1021, -0.1119,  0.0808, -0.0775,  0.0459,  0.1309,
          0.0537, -0.1164]], grad_fn=<AddmmBackward0>)

In [6]:
# one hidden layer MLP
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X = torch.rand(size=(2, 4))
net(X)

tensor([[0.1833],
        [0.2056]], grad_fn=<AddmmBackward0>)

In [7]:
# access param (w, b)
print(net[2].state_dict())  # print w, b
print(type(net[2].bias))    # parameter object type
print(net[2].bias)          # access param object b
print(net[2].bias.data)     # access param value

OrderedDict([('weight', tensor([[-0.3082,  0.2965,  0.0948,  0.0971, -0.2206,  0.1080,  0.2011,  0.2131]])), ('bias', tensor([0.2803]))])
<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.2803], requires_grad=True)
tensor([0.2803])


In [8]:
# access params from nested block
def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 4))

def block2():
    net = nn.Sequential()
    # nested block1 4 times
    for i in range(4):
        net.add_module(f'block {i}', block1())
    return net

rgnet = nn.Sequential(block2(), nn.Linear(4,1))
rgnet(X)



tensor([[0.2224],
        [0.2223]], grad_fn=<AddmmBackward0>)

In [9]:
# show the block structure
print(rgnet)

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [10]:
# initialize params
def init_normal(m):
    # filter out activition layer
    if type(m) ==nn.Linear:
        # normal distribution
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)

net.apply(init_normal)
net[0].weight.data[0], net[0].bias.data[0]

(tensor([-0.0102,  0.0014, -0.0075,  0.0076]), tensor(0.))

In [11]:
def init_constant(m):
    if type(m) ==nn.Linear:
        nn.init.constant_(m.weight, 1)
        nn.init.zeros_(m.bias)

In [12]:
def xavier(m):
    if type(m) ==nn.Linear:
        nn.init.xavier_uniform_(m.weight)
        
# use xavier method to initialize the first linear layer
net[0].apply(xavier)
print(net[0].weight.data[0])

tensor([-0.5645, -0.5712, -0.0088, -0.0739])


In [13]:
# share params
shared = nn.Linear(8, 8)
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), shared, nn.ReLU(), shared, nn.ReLU(), nn.Linear(8, 1))

# customize layer

In [14]:
# layer without params
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self, X):
        return X - X.mean()
    
layer = CenteredLayer()
layer(torch.FloatTensor([1, 2, 3, 4, 5]))

tensor([-2., -1.,  0.,  1.,  2.])

In [15]:
# layer with params
from turtle import forward
from pyparsing import Forward


class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        # add gradient using nn.Parameter function
        self.weight = nn.Parameter(torch.randn(in_units, units))
        self.bias = nn.Parameter(torch.randn(units,))
        
    def forward(self, X):
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)
    
dense = MyLinear(5, 3)
dense.weight

Parameter containing:
tensor([[ 0.6476, -0.0188, -0.3232],
        [-0.8189, -0.0265,  0.4040],
        [-1.5988, -0.4538,  0.0355],
        [-0.2962, -0.7797,  1.2394],
        [ 2.2594, -0.6998, -2.5297]], requires_grad=True)

In [16]:
# compute forward
dense(torch.rand(2, 5))

tensor([[1.4583, 0.0000, 0.0000],
        [0.0000, 0.0000, 1.1508]])

# read and write file

In [None]:
# save and load tensor
x = torch.arange(4)
torch.save(x, 'filename')

x2 = torch.load('filename')
# support list and dictionary

In [None]:
# save and load model -> save weight of all neuron as dict{neuron label: weight}
torch.save(net.state_dict(), 'filename')

clone = MLP()
clone.load_state_dict(torch.load('filename'))