In [17]:
import torch
from torch import nn

In [18]:
x = torch.randn(100, 3)
layer = nn.Linear(3, 5)
print(layer(x).shape)
print(layer.weight)
print(layer.bias)

torch.Size([100, 5])
Parameter containing:
tensor([[-0.3502,  0.2985,  0.0194],
        [-0.4961, -0.1091,  0.2607],
        [ 0.0743, -0.5648, -0.5665],
        [-0.2612,  0.3939, -0.2900],
        [ 0.0668, -0.5712,  0.0923]], requires_grad=True)
Parameter containing:
tensor([-0.0614, -0.2793,  0.4595, -0.5550,  0.5238], requires_grad=True)


In [19]:
# ReLU 통과시키면 음수가 모두 사라짐
x = torch.randn(2, 5)
layer = nn.ReLU()
print(layer(x))

tensor([[0.4707, 0.0000, 0.0000, 0.4578, 0.0000],
        [2.0292, 0.0779, 0.0000, 0.4548, 0.5102]])


In [20]:
# dropout에서 p는 죽일 확률
x = torch.randn(3, 7)
drop = nn.Dropout(p = 0.9)
print(drop(x))

tensor([[0.0000, -0.0000, 0.0000, -0.0000, -0.0000, -0.0000, 0.0000],
        [0.8667, -0.0000, 0.0000, -0.0000, 0.0000, -0.0000, -0.0000],
        [-0.0000, -0.0000, -0.0000, 0.0000, -0.0000, -0.0000, 0.0000]])


In [21]:
class sample_model(nn.Module):
    def __init__(self) -> None:
        super().__init__()

        self.drop_layer = nn.Sequential(nn.Linear(5, 7),
                                        nn.Dropout(p=0.3))

    def forward(self, x):
        x = self.drop_layer(x)
        return x
    
model = sample_model()
model.train()
x = torch.randn(3, 5)
print(model(x))

# test mode에서는 살아남을 확률(0.7)이 원래 값에 곱해져서 나간다
model.eval()
print(model(x))

tensor([[-0.0000, -0.4967, -0.6252, -0.0000,  0.0000, -0.0000,  0.1559],
        [-0.0000, -0.0000, -0.0000, -0.0000,  0.0000, -0.0000,  1.3528],
        [ 0.9132, -0.7828, -0.0988,  0.2376,  0.1345,  0.1172, -0.2609]],
       grad_fn=<MulBackward0>)
tensor([[-0.0954, -0.3477, -0.4376, -0.2687,  0.3355, -0.9282,  0.1092],
        [-0.2191, -1.0686, -0.0944, -0.4416,  0.2446, -0.8632,  0.9469],
        [ 0.6392, -0.5479, -0.0692,  0.1663,  0.0942,  0.0820, -0.1826]],
       grad_fn=<AddmmBackward0>)


In [22]:
layer = nn.Conv2d(6, 7, 4, stride=1, padding=1)
print(layer(torch.randn(32, 6, 5, 5)).shape)
print(layer.weight.shape)

torch.Size([32, 7, 4, 4])
torch.Size([7, 6, 4, 4])


In [23]:
conv1 = nn.Conv2d(1, 8, 6, stride=2)
x = torch.randn(32, 1, 28, 28)
print(conv1(x).shape)

conv2 = nn.Conv2d(8, 16, 3, padding=1)
print(conv2(conv1(x)).shape)

Maxpool = nn.MaxPool2d(kernel_size=2, stride=(2, 2))
print(Maxpool(conv2(conv1(x))).shape)

torch.Size([32, 8, 12, 12])
torch.Size([32, 16, 12, 12])
torch.Size([32, 16, 6, 6])


In [24]:
maxpool = nn.MaxPool2d(2) # 2로만 줘도 자동으로 kernel_size=2, stride=(2, 2)
x = torch.randn(1, 1, 6, 6)
print(x)
print(maxpool(x))

tensor([[[[ 2.6342,  0.8048,  0.2087,  0.8149, -0.1825, -0.6585],
          [ 1.0300, -0.4161, -0.3111, -0.9790,  0.8363,  0.6256],
          [-0.4819,  2.4960,  0.3705,  0.5296, -1.7951,  1.4587],
          [ 0.5513,  0.7049,  0.1465,  0.9725, -0.2986, -0.4650],
          [ 0.1393, -0.9076, -0.1938,  0.7208,  0.0054, -1.4444],
          [ 0.9417,  0.2168,  0.0344, -1.1267, -0.1688,  1.1078]]]])
tensor([[[[2.6342, 0.8149, 0.8363],
          [2.4960, 0.9725, 1.4587],
          [0.9417, 0.7208, 1.1078]]]])


In [25]:
avgpool = nn.AvgPool2d(2)
x = torch.randn(1, 1, 6, 6)
print(x)
print(avgpool(x))
print(avgpool(torch.randn(32, 3, 6, 6)).shape)

tensor([[[[ 1.3224e-01,  1.3153e+00,  1.1555e+00, -3.1420e-01,  1.4108e+00,
            8.5479e-01],
          [ 7.5792e-01,  1.0340e+00,  9.5334e-01,  2.8559e+00,  2.2333e-01,
            2.6221e-01],
          [-2.6416e-01, -3.8056e-01,  5.9388e-01, -2.5561e-01,  1.0151e+00,
            4.6531e-01],
          [ 9.9889e-02, -8.6791e-01, -5.6894e-01, -1.1764e+00,  1.9463e+00,
           -4.1642e-01],
          [-1.7762e+00,  1.4012e+00, -1.1229e+00,  4.1898e-01,  2.9276e-01,
            1.0660e+00],
          [ 8.8256e-01, -1.2590e-01, -1.7291e-01, -8.0863e-01,  2.7510e-03,
           -8.6627e-01]]]])
tensor([[[[ 0.8099,  1.1626,  0.6878],
          [-0.3532, -0.3518,  0.7526],
          [ 0.0954, -0.4214,  0.1238]]]])
torch.Size([32, 3, 3, 3])


In [31]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = nn.Sequential(nn.Conv2d(3, 8, 3, padding=1),
                                   nn.BatchNorm2d(8),
                                   nn.ReLU())
        self.Maxpool1 = nn.MaxPool2d(2)

        self.conv2 = nn.Sequential(nn.Conv2d(8, 16, 3, padding=1),
                                   nn.BatchNorm2d(16),
                                   nn.ReLU())
        self.Maxpool2 = nn.MaxPool2d(2)

        self.conv3 = nn.Sequential(nn.Conv2d(16, 32, 3, padding=1),
                                   nn.BatchNorm2d(32),
                                   nn.ReLU())
        self.Maxpool3 = nn.MaxPool2d(2)

        self.fc = nn.Linear(32*4*4, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.Maxpool1(x)        
        x = self.conv2(x)
        x = self.Maxpool2(x)
        x = self.conv3(x)
        x = self.Maxpool3(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc(x)
        return x
    
x = torch.randn(32, 3, 32, 32)
model = CNN()
print(model(x).shape)

torch.Size([32, 10])


### .parameters() vs .modules() vs .children() 그리고 isinstance의 활용

In [32]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Sequential(nn.Linear(2,3),
                                 nn.ReLU())
        self.fc2 = nn.Sequential(nn.Linear(3,4),
                                 nn.ReLU())
        self.fc_out = nn.Sequential(nn.Linear(4,1),
                                    nn.Sigmoid())
    def forward(self,x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc_out(x)
        return x
        
model = MLP()
print(model(torch.randn(2,2)).shape)
print(model)

torch.Size([2, 1])
MLP(
  (fc1): Sequential(
    (0): Linear(in_features=2, out_features=3, bias=True)
    (1): ReLU()
  )
  (fc2): Sequential(
    (0): Linear(in_features=3, out_features=4, bias=True)
    (1): ReLU()
  )
  (fc_out): Sequential(
    (0): Linear(in_features=4, out_features=1, bias=True)
    (1): Sigmoid()
  )
)


In [33]:
model.parameters()

<generator object Module.parameters at 0x7fef7d4fbd60>

In [34]:
list(model.parameters())[0]
# [layer0 weight 값, layer0 bias 값, layer1 weight 값, layer1 bias 값, ...]

Parameter containing:
tensor([[-0.4047, -0.4735],
        [-0.1519, -0.0417],
        [-0.3605,  0.4823]], requires_grad=True)

In [44]:
# for transfer learning
model = MLP()
# print([p for p in model.parameters() if p.requires_grad])

for p in model.parameters(): # 전체 freeze
    p.requires_grad = False
model.fc_out = nn.Linear(4, 10)

# 모두 얼렸기 때문에 빈 list가 나온다
# print([p for p in model.parameters() if p.requires_grad])

params = ([p for p in model.parameters() if p.requires_grad])
print(params)

# requires_grad가 true인 것들만 학습된다 
from torch import optim
optimizer = optim.Adam(params, lr=0.1)

[Parameter containing:
tensor([[-0.0650, -0.1084,  0.1067,  0.1666],
        [ 0.0571, -0.0531,  0.0947, -0.4879],
        [ 0.4764, -0.4066, -0.0661,  0.0385],
        [ 0.1560, -0.2527,  0.3926,  0.0518],
        [-0.1650, -0.3495, -0.1433,  0.3744],
        [-0.1757,  0.0883, -0.0162,  0.3376],
        [ 0.0062, -0.2141, -0.0682,  0.0047],
        [-0.3425, -0.3409, -0.4371,  0.4798],
        [-0.1241, -0.4228,  0.2612,  0.2149],
        [ 0.2722,  0.4271, -0.3821,  0.3314]], requires_grad=True), Parameter containing:
tensor([-0.0477, -0.2718, -0.4806, -0.2161, -0.4872,  0.1474,  0.3046, -0.3244,
        -0.3586,  0.2718], requires_grad=True)]


In [45]:
for name, p in model.named_parameters():
    print(name)
    print(p)

fc1.0.weight
Parameter containing:
tensor([[-0.6263,  0.6195],
        [ 0.1901, -0.1824],
        [-0.0325,  0.3171]])
fc1.0.bias
Parameter containing:
tensor([-0.5685,  0.2003,  0.2153])
fc2.0.weight
Parameter containing:
tensor([[-0.3623,  0.5447,  0.5735],
        [ 0.0638,  0.1563,  0.0239],
        [-0.4210, -0.2674, -0.5510],
        [-0.1019, -0.0227,  0.0784]])
fc2.0.bias
Parameter containing:
tensor([-0.4456, -0.3449,  0.5631, -0.1026])
fc_out.weight
Parameter containing:
tensor([[-0.0650, -0.1084,  0.1067,  0.1666],
        [ 0.0571, -0.0531,  0.0947, -0.4879],
        [ 0.4764, -0.4066, -0.0661,  0.0385],
        [ 0.1560, -0.2527,  0.3926,  0.0518],
        [-0.1650, -0.3495, -0.1433,  0.3744],
        [-0.1757,  0.0883, -0.0162,  0.3376],
        [ 0.0062, -0.2141, -0.0682,  0.0047],
        [-0.3425, -0.3409, -0.4371,  0.4798],
        [-0.1241, -0.4228,  0.2612,  0.2149],
        [ 0.2722,  0.4271, -0.3821,  0.3314]], requires_grad=True)
fc_out.bias
Parameter containing

In [46]:
model.modules()

<generator object Module.modules at 0x7fef7ce5a200>

In [48]:
list(model.modules())

[MLP(
   (fc1): Sequential(
     (0): Linear(in_features=2, out_features=3, bias=True)
     (1): ReLU()
   )
   (fc2): Sequential(
     (0): Linear(in_features=3, out_features=4, bias=True)
     (1): ReLU()
   )
   (fc_out): Linear(in_features=4, out_features=10, bias=True)
 ),
 Sequential(
   (0): Linear(in_features=2, out_features=3, bias=True)
   (1): ReLU()
 ),
 Linear(in_features=2, out_features=3, bias=True),
 ReLU(),
 Sequential(
   (0): Linear(in_features=3, out_features=4, bias=True)
   (1): ReLU()
 ),
 Linear(in_features=3, out_features=4, bias=True),
 ReLU(),
 Linear(in_features=4, out_features=10, bias=True)]

In [51]:
# gradient vanishing 일어나고 있는지도 확인 가능 (그림 그려서)
print([m for m in model.modules() if isinstance(m,nn.Linear)])
print([m.weight for m in model.modules() if isinstance(m,nn.Linear)])
print([m.weight.grad for m in model.modules() if isinstance(m,nn.Linear)])

[Linear(in_features=2, out_features=3, bias=True), Linear(in_features=3, out_features=4, bias=True), Linear(in_features=4, out_features=10, bias=True)]
[Parameter containing:
tensor([[-0.6263,  0.6195],
        [ 0.1901, -0.1824],
        [-0.0325,  0.3171]]), Parameter containing:
tensor([[-0.3623,  0.5447,  0.5735],
        [ 0.0638,  0.1563,  0.0239],
        [-0.4210, -0.2674, -0.5510],
        [-0.1019, -0.0227,  0.0784]]), Parameter containing:
tensor([[-0.0650, -0.1084,  0.1067,  0.1666],
        [ 0.0571, -0.0531,  0.0947, -0.4879],
        [ 0.4764, -0.4066, -0.0661,  0.0385],
        [ 0.1560, -0.2527,  0.3926,  0.0518],
        [-0.1650, -0.3495, -0.1433,  0.3744],
        [-0.1757,  0.0883, -0.0162,  0.3376],
        [ 0.0062, -0.2141, -0.0682,  0.0047],
        [-0.3425, -0.3409, -0.4371,  0.4798],
        [-0.1241, -0.4228,  0.2612,  0.2149],
        [ 0.2722,  0.4271, -0.3821,  0.3314]], requires_grad=True)]
[None, None, None]


In [53]:
# weight initialization에 활용
for m in model.modules():
    if isinstance(m, nn.Linear):
        # nn.init.kaiming_normal_(m.weight)
        nn.init.constant_(m.weight, 1)

print([m.weight for m in model.modules() if isinstance(m, nn.Linear)])

[Parameter containing:
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]]), Parameter containing:
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]), Parameter containing:
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], requires_grad=True)]


In [54]:
model.children()

<generator object Module.children at 0x7fef7ce5a900>

In [55]:
list(model.children())

[Sequential(
   (0): Linear(in_features=2, out_features=3, bias=True)
   (1): ReLU()
 ),
 Sequential(
   (0): Linear(in_features=3, out_features=4, bias=True)
   (1): ReLU()
 ),
 Linear(in_features=4, out_features=10, bias=True)]

In [56]:
x = torch.randn(2,2)
list(model.children())[0](x)

tensor([[0.0000, 0.0000, 0.0000],
        [0.2814, 1.0502, 1.0652]])

In [57]:
print(*list(model.children())[:2])
sub_network = nn.Sequential(*list(model.children())[:2])
print(sub_network)
print(sub_network(x))

Sequential(
  (0): Linear(in_features=2, out_features=3, bias=True)
  (1): ReLU()
) Sequential(
  (0): Linear(in_features=3, out_features=4, bias=True)
  (1): ReLU()
)
Sequential(
  (0): Sequential(
    (0): Linear(in_features=2, out_features=3, bias=True)
    (1): ReLU()
  )
  (1): Sequential(
    (0): Linear(in_features=3, out_features=4, bias=True)
    (1): ReLU()
  )
)
tensor([[0.0000, 0.0000, 0.5631, 0.0000],
        [1.9512, 2.0519, 2.9599, 2.2943]])


### ModuleList vs Sequential

In [58]:
fc=nn.Linear(3,3)
layer_list = [fc for _ in range(5)]
layers1 = nn.Sequential(*layer_list)
layers2 = nn.ModuleList(layer_list)
print(layers1)
print(layers2)

x=torch.randn(1,3)
print(layers1(x))

# print(layers2(x)) # error!
for layer in layers2:
    x = layer(x)
print(x)

Sequential(
  (0): Linear(in_features=3, out_features=3, bias=True)
  (1): Linear(in_features=3, out_features=3, bias=True)
  (2): Linear(in_features=3, out_features=3, bias=True)
  (3): Linear(in_features=3, out_features=3, bias=True)
  (4): Linear(in_features=3, out_features=3, bias=True)
)
ModuleList(
  (0-4): 5 x Linear(in_features=3, out_features=3, bias=True)
)
tensor([[-0.1562, -0.3351, -0.3850]], grad_fn=<AddmmBackward0>)
tensor([[-0.1562, -0.3351, -0.3850]], grad_fn=<AddmmBackward0>)


In [59]:
# 걍 리스트 쓰지 왜 nn.ModuleList 를 쓸까?
class testNet(nn.Module):
    def __init__(self):
        super().__init__()

        # self.Module_List = [nn.Linear(3,3), nn.Linear(3,3)]
        self.Module_List = nn.ModuleList([nn.Linear(3,3), nn.Linear(3,3)])

    def forward(self,x):
        for layer in self.Module_List:
            x = layer(x)
        return x

model=testNet()
print(model(torch.randn(1,3)))

print(model) # 그냥 리스트로 하면 등록이 안돼있다!

optimizer = optim.Adam(model.parameters(), lr = 0.1) # 등록이 안돼있으면 parameter를 못 찾는다!

tensor([[ 1.2951, -0.0468,  0.2756]], grad_fn=<AddmmBackward0>)
testNet(
  (Module_List): ModuleList(
    (0-1): 2 x Linear(in_features=3, out_features=3, bias=True)
  )
)


In [60]:
# 그럼 nn.Sequential 쓰고 말지 왜 굳이 nn.ModuleList?
class small_block(nn.Module):
    def __init__(self):
        super().__init__()
        self.block_x = nn.Linear(1,1)
        self.block_y = nn.Linear(1,1)

    def forward(self, x, y):
        x = self.block_x(x)
        y = self.block_y(y)
        return x, y

block = small_block()
print(block)
model = nn.Sequential(block, block)
print(model)
# model(torch.randn(1), torch.randn(1)) # error!
# nn.Sequential 이 가지고 있는 forward 함수를 call 하기 때문에 입력을 두 개 넣으면 안된다!!

model = nn.ModuleList([block,block])
x = torch.randn(1)
y = torch.randn(1)
for block in model:
    x, y = block(x,y)
print(x, y)

small_block(
  (block_x): Linear(in_features=1, out_features=1, bias=True)
  (block_y): Linear(in_features=1, out_features=1, bias=True)
)
Sequential(
  (0): small_block(
    (block_x): Linear(in_features=1, out_features=1, bias=True)
    (block_y): Linear(in_features=1, out_features=1, bias=True)
  )
  (1): small_block(
    (block_x): Linear(in_features=1, out_features=1, bias=True)
    (block_y): Linear(in_features=1, out_features=1, bias=True)
  )
)
tensor([-1.2391], grad_fn=<AddBackward0>) tensor([-0.7862], grad_fn=<AddBackward0>)


- modulelist는 forward 함수가 정의되어 있지 않다.
- sequential에는 하나를 입력받아서 하나를 출력하는 forward 함수가 정의되어 있다.
- 그래서 경우에 맞게 사용하면 된다. 