# №5. Составление моделей НС

In [1]:
import torch
import torch.nn as nn

## 1. nn.__Sequential__

In [2]:
model = nn.Sequential(
    nn.Linear(784, 128),
    nn.ReLU(),
    nn.Linear(128, 10)
)

In [3]:
model

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=10, bias=True)
)

In [6]:
x = torch.rand([16, 784], dtype=torch.float32)
out = model(x)
out.shape

torch.Size([16, 10])

In [9]:
model.state_dict().keys()

odict_keys(['0.weight', '0.bias', '2.weight', '2.bias'])

In [10]:
model.state_dict()['0.bias']

tensor([-0.0176,  0.0242,  0.0034,  0.0239, -0.0010, -0.0163, -0.0325,  0.0056,
         0.0089,  0.0021, -0.0026,  0.0286,  0.0109, -0.0193,  0.0042, -0.0231,
         0.0170, -0.0015,  0.0313, -0.0140,  0.0313, -0.0032, -0.0175, -0.0288,
         0.0107,  0.0073, -0.0318,  0.0302, -0.0069, -0.0158, -0.0167, -0.0265,
         0.0356, -0.0316, -0.0189, -0.0193,  0.0335,  0.0308, -0.0039,  0.0271,
        -0.0023, -0.0159,  0.0323,  0.0074, -0.0220,  0.0336,  0.0289, -0.0253,
        -0.0342, -0.0353, -0.0002, -0.0336, -0.0349,  0.0052,  0.0269, -0.0111,
        -0.0254,  0.0012,  0.0189, -0.0256, -0.0296, -0.0144, -0.0146, -0.0248,
        -0.0098, -0.0203, -0.0271, -0.0251, -0.0168, -0.0155,  0.0158,  0.0340,
        -0.0074, -0.0034, -0.0224, -0.0349,  0.0060,  0.0004,  0.0157,  0.0325,
        -0.0113,  0.0055, -0.0187,  0.0026,  0.0156, -0.0265, -0.0288,  0.0259,
         0.0028, -0.0338, -0.0303,  0.0117, -0.0299,  0.0236, -0.0026, -0.0345,
         0.0218, -0.0289, -0.0329, -0.02

In [11]:
for parameter in model.parameters():
    print(parameter)
    print(parameter.shape)
    break

Parameter containing:
tensor([[ 0.0113,  0.0204, -0.0318,  ...,  0.0058, -0.0090, -0.0023],
        [-0.0055,  0.0020,  0.0335,  ..., -0.0321,  0.0205, -0.0295],
        [ 0.0248, -0.0019,  0.0071,  ...,  0.0080, -0.0241,  0.0081],
        ...,
        [-0.0013,  0.0302, -0.0067,  ..., -0.0036,  0.0033,  0.0106],
        [-0.0045,  0.0159,  0.0110,  ..., -0.0349, -0.0175, -0.0061],
        [-0.0302,  0.0184, -0.0196,  ..., -0.0017, -0.0230,  0.0205]],
       requires_grad=True)
torch.Size([128, 784])


In [12]:
# Для тренировки модели (есть какой-то прикол с dropout)
model.train()
model.eval()

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=10, bias=True)
)

## 2. model.__add_module__

In [13]:
model = nn.Sequential()
model.add_module('layer_1', nn.Linear(784, 128))
model.add_module('relu', nn.ReLU())
model.add_module('layer_2', nn.Linear(128, 10))

In [14]:
model

Sequential(
  (layer_1): Linear(in_features=784, out_features=128, bias=True)
  (relu): ReLU()
  (layer_2): Linear(in_features=128, out_features=10, bias=True)
)

In [15]:
model.layer_1, model.relu

(Linear(in_features=784, out_features=128, bias=True), ReLU())

In [18]:
model.state_dict().keys()

odict_keys(['layer_1.weight', 'layer_1.bias', 'layer_2.weight', 'layer_2.bias'])

## 3. __class__ myModel

In [19]:
class MyModel(nn.Module):
    def __init__(self, input, output):
        super().__init__()
        self.layer_1 = nn.Linear(input, 128)
        self.layer_2 = nn.Linear(128, output)
        self.act_func = nn.ReLU()
        
    def forward(self, x):
        x = self.layer_1(x)
        x = self.act_func(x)
        out = self.layer_2(x)
        return out

In [21]:
model = MyModel(784, 10)
model

MyModel(
  (layer_1): Linear(in_features=784, out_features=128, bias=True)
  (layer_2): Linear(in_features=128, out_features=10, bias=True)
  (act_func): ReLU()
)

In [25]:
x = torch.rand([16, 784], dtype=torch.float32)
out = model(x)
out.shape

torch.Size([16, 10])

In [27]:
x = torch.rand([784], dtype=torch.float32)
out = model(x)
out.shape, out

(torch.Size([10]),
 tensor([-0.0443,  0.1750, -0.0504,  0.0315, -0.1435, -0.1335,  0.1018,  0.0016,
         -0.0040,  0.1839], grad_fn=<ViewBackward0>))

Модель с двумя входами (X, y) и двумя выходами (out, after_relu)

<img src="data/my_images/nn_schema.png" width=200>

In [36]:
# модель с 2-мя входами и 2-мя выходами
class MyModel(nn.Module):
    def __init__(self, input, output):
        super().__init__()
        self.layer_1 = nn.Linear(input, 128)
        self.layer_2 = nn.Linear(128, output)
        self.act_func = nn.ReLU()
        
    def forward(self, x, y):
        x = self.layer_1(x)
        x = self.act_func(x + y)
        out = self.layer_2(x)
        return out, x

In [37]:
model = MyModel(784, 10)
model

MyModel(
  (layer_1): Linear(in_features=784, out_features=128, bias=True)
  (layer_2): Linear(in_features=128, out_features=10, bias=True)
  (act_func): ReLU()
)

In [38]:
x = torch.rand([16, 784], dtype=torch.float32)
y = torch.rand([16, 128], dtype=torch.float32)

out = model(x, y)
out[0].shape, out[1].shape

(torch.Size([16, 10]), torch.Size([16, 128]))

## 4. nn.**ModuleDict** & nn.**ModuleList**

In [47]:
class MyModel(nn.Module):
    def __init__(self, input, output, hidden_size=2048, activation_func='relu'):
        super().__init__()
        # словарь для простой смены функции потерь
        self.activations = nn.ModuleDict({
            'lrelu':nn.LeakyReLU(),
            'relu':nn.ReLU()
        })
        # слои можно хранить в ModuleList
        self.layers = nn.ModuleList()
        for i in range(10):
            lin = nn.Linear(input, hidden_size)
            lin_name = f'layer_{i}'
            self.layers.add_module(name=lin_name, module=lin)
            # теперь можно давать названия слоям для удобства
            self.layers.add_module(name=f'act_{i}', module=self.activations[activation_func])
            input = hidden_size
            hidden_size //= 2
        last_lin = nn.Linear(input, output)
        self.layers.add_module(*('layer_out', last_lin))
    
    def forward(self, x):
        outputs = []
        for i, layer in enumerate(self.layers):
            x = layer(x)
            if i % 2 == 0 and i % 4 != 0: # каждый второй через одного
                outputs.append(x)
        outputs.append(x) # выход последнего слоя
        return outputs

In [48]:
model = MyModel(784, 2)
model

MyModel(
  (activations): ModuleDict(
    (lrelu): LeakyReLU(negative_slope=0.01)
    (relu): ReLU()
  )
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=2048, bias=True)
    (1): ReLU()
    (2): Linear(in_features=2048, out_features=1024, bias=True)
    (3): ReLU()
    (4): Linear(in_features=1024, out_features=512, bias=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=256, bias=True)
    (7): ReLU()
    (8): Linear(in_features=256, out_features=128, bias=True)
    (9): ReLU()
    (10): Linear(in_features=128, out_features=64, bias=True)
    (11): ReLU()
    (12): Linear(in_features=64, out_features=32, bias=True)
    (13): ReLU()
    (14): Linear(in_features=32, out_features=16, bias=True)
    (15): ReLU()
    (16): Linear(in_features=16, out_features=8, bias=True)
    (17): ReLU()
    (18): Linear(in_features=8, out_features=4, bias=True)
    (19): ReLU()
    (20): Linear(in_features=4, out_features=2, bias=True)
  )
)

In [None]:
x = torch.rand([16, 784])
out = model(x)
len(out)

(6, torch.Size([16, 256]))

In [51]:
for i in range(6):
    print(f'out[{i}].shape={out[i].shape}')

out[0].shape=torch.Size([16, 1024])
out[1].shape=torch.Size([16, 256])
out[2].shape=torch.Size([16, 64])
out[3].shape=torch.Size([16, 16])
out[4].shape=torch.Size([16, 4])
out[5].shape=torch.Size([16, 2])
