In [None]:
import torch
from torch import nn

### Dropout

In [None]:
class sample_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.drop_layer = nn.Sequential(nn.Linear(5,7),
                                        nn.Dropout(0.3))
    def forward(self, x):
        x = self.drop_layer(x)
        return x

In [None]:
model = sample_model()
model.train()
x = torch.randn(3,5)
print(model(x))

model.eval()
print(model(x)) # 살 확률 0.7 이 곱해진다.

### CNN 구현

In [1]:
import torch
from torch import nn
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from multiclass_functions import *

In [2]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(DEVICE)

cuda


In [3]:
transform_train = transforms.ToTensor()
transform_test = transforms.ToTensor()

BATCH_SIZE = 64
TRAIN_RATIO = 0.8

train_DS = datasets.CIFAR10(root='./data', train=True, transform=transform_train, download=True)
NoT = int(len(train_DS) * TRAIN_RATIO)
NoV = len(train_DS) - NoT
train_DS, val_DS = torch.utils.data.random_split(train_DS, [NoT, NoV])
val_DS.transform = transform_test
test_DS = datasets.CIFAR10(root='./data', train=False, transform=transform_test)

train_DL = torch.utils.data.DataLoader(train_DS, batch_size=BATCH_SIZE, shuffle=True)
val_DL = torch.utils.data.DataLoader(val_DS, batch_size=BATCH_SIZE, shuffle=True)
test_DL = torch.utils.data.DataLoader(test_DS, batch_size=BATCH_SIZE, shuffle=True)

Files already downloaded and verified


In [8]:
class CNN_deep(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv_block1 = nn.Sequential(nn.Conv2d(3,32,3, padding=1),
                                        nn.BatchNorm2d(32),
                                        nn.ReLU(),
                                        nn.Conv2d(32,32,3, padding=1),
                                        nn.BatchNorm2d(32),
                                        nn.ReLU())
        self.Maxpool1 = nn.MaxPool2d(kernel_size=2)
        
        self.conv_block2 = nn.Sequential(nn.Conv2d(32,64,3, padding=1),
                                        nn.BatchNorm2d(64),
                                        nn.ReLU(),
                                        nn.Conv2d(64,64,3, padding=1),
                                        nn.BatchNorm2d(64),
                                        nn.ReLU(),
                                        nn.Conv2d(64,64,3, padding=1),
                                        nn.BatchNorm2d(64),
                                        nn.ReLU())
        self.Maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        self.conv_block3 = nn.Sequential(nn.Conv2d(64,128,3, padding=1),
                                        nn.BatchNorm2d(128),
                                        nn.ReLU(),
                                        nn.Conv2d(128,128,3, padding=1),
                                        nn.BatchNorm2d(128),
                                        nn.ReLU(),
                                        nn.Conv2d(128,128,3, padding=1),
                                        nn.BatchNorm2d(128),
                                        nn.ReLU())
        self.Maxpool3 = nn.MaxPool2d(kernel_size=2)
        
        self.fc = nn.Sequential(nn.Linear(2048,512),
                                nn.Linear(512, 10))
    
    def forward(self, x):
        x = self.conv_block1(x)
        x = self.Maxpool1(x)
        x = self.conv_block2(x)
        x = self.Maxpool2(x)
        x = self.conv_block3(x)
        x = self.Maxpool3(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc(x)
                
        return x

In [12]:
model = CNN_deep()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
EPOCH = 10
NoT = len(train_DS)

model.train()
loss_hist = []
for ep in range(EPOCH):
    r_loss = 0
    for x_batch, y_batch in train_DL:
        x_batch = x_batch.to(DEVICE)
        y_batch = y_batch.to(DEVICE)
        model = model.to(DEVICE)
        
        y_hat = model(x_batch)
        loss = criterion(y_hat, y_batch)
                
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_b = loss.item() * x_batch.shape[0]
        r_loss += loss_b        
    
    # print loss
    loss_e = r_loss / len(train_DL.dataset)
    loss_hist.append(loss_e)
    print(f"Epoch {ep + 1}/{EPOCH}, Loss: {round(loss_e, 3)}")
    print("-" * 20)
 

Epoch 1/10, Loss: 1.65
--------------------
Epoch 2/10, Loss: 1.061
--------------------
Epoch 3/10, Loss: 0.833
--------------------
Epoch 4/10, Loss: 0.699
--------------------
Epoch 5/10, Loss: 0.595
--------------------
Epoch 6/10, Loss: 0.507
--------------------
Epoch 7/10, Loss: 0.428
--------------------
Epoch 8/10, Loss: 0.367
--------------------
Epoch 9/10, Loss: 0.31
--------------------
Epoch 10/10, Loss: 0.26
--------------------


In [21]:
model.eval()
with torch.no_grad():
    correct_r = 0
    for x_batch, y_batch in test_DL:
        x_batch = x_batch.to(DEVICE)
        y_batch = y_batch.to(DEVICE)
        model = model.to(DEVICE)
        
        y_hat = model(x_batch)
        pred = torch.argmax(y_hat, dim=1)
        correct_b = (pred == y_batch).sum().item()
        correct_r += correct_b
    accuracy = correct_r / len(test_DL.dataset) * 100
    print(f"Test accuracy: {correct_r}/{len(test_DL.dataset)} ({round(accuracy, 1)} %)")
        

Test accuracy: 8187/10000 (81.9 %)


In [15]:
save_model_path = "./temp_model.pth"
torch.save(model.state_dict(), save_model_path)

In [16]:
load_model = CNN_deep()
load_model.load_state_dict(torch.load(save_model_path))

<All keys matched successfully>

In [22]:
model.eval()
with torch.no_grad():
    correct_r = 0
    for x_batch, y_batch in test_DL:
        x_batch = x_batch.to(DEVICE)
        y_batch = y_batch.to(DEVICE)
        model = load_model.to(DEVICE)
        
        y_hat = model(x_batch)
        pred = torch.argmax(y_hat, dim=1)
        correct_b = (pred == y_batch).sum().item()
        correct_r += correct_b
    accuracy = correct_r / len(test_DL.dataset) * 100
    print(f"Test accuracy: {correct_r}/{len(test_DL.dataset)} ({round(accuracy, 1)} %)")

Test accuracy: 8187/10000 (81.9 %)


### parameters() vs modules() vs children() 그리고 instance 의 활용

In [1]:
import torch
from torch import nn
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from multiclass_functions import *

In [2]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Sequential(nn.Linear(2,3),
                                 nn.ReLU())
        self.fc2 = nn.Sequential(nn.Linear(3,4),
                                 nn.ReLU())
        self.fc_out = nn.Sequential(nn.Linear(4,1),
                                 nn.Sigmoid())
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc_out(x)
        return x

In [5]:
# for transfer learning
model = MLP()
print(list(model.parameters()))
print()
print([p for p in model.parameters() if p.requires_grad==True])

[Parameter containing:
tensor([[ 0.5339, -0.6109],
        [-0.3717, -0.0799],
        [-0.3760,  0.0765]], requires_grad=True), Parameter containing:
tensor([-0.5630,  0.1454,  0.3495], requires_grad=True), Parameter containing:
tensor([[ 0.1106,  0.2840, -0.2463],
        [-0.1399, -0.2008, -0.4529],
        [ 0.5536,  0.1315, -0.3006],
        [-0.0438,  0.4574,  0.0928]], requires_grad=True), Parameter containing:
tensor([-0.1177,  0.3327, -0.1370, -0.2964], requires_grad=True), Parameter containing:
tensor([[-0.4216,  0.1111,  0.2893, -0.2780]], requires_grad=True), Parameter containing:
tensor([0.4232], requires_grad=True)]

[Parameter containing:
tensor([[ 0.5339, -0.6109],
        [-0.3717, -0.0799],
        [-0.3760,  0.0765]], requires_grad=True), Parameter containing:
tensor([-0.5630,  0.1454,  0.3495], requires_grad=True), Parameter containing:
tensor([[ 0.1106,  0.2840, -0.2463],
        [-0.1399, -0.2008, -0.4529],
        [ 0.5536,  0.1315, -0.3006],
        [-0.0438,  0

In [6]:
for p in model.parameters():
    p.requires_grad = False

print([p for p in model.parameters() if p.requires_grad==True])

[]


In [7]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [12]:
print([m for m in model.modules() if isinstance(m, nn.Linear)])
print()
print([m.weight for m in model.modules() if isinstance(m, nn.Linear)])
print()
print([m.weight.grad for m in model.modules() if isinstance(m, nn.Linear)])

[Linear(in_features=2, out_features=3, bias=True), Linear(in_features=3, out_features=4, bias=True), Linear(in_features=4, out_features=1, bias=True)]

[Parameter containing:
tensor([[ 0.5339, -0.6109],
        [-0.3717, -0.0799],
        [-0.3760,  0.0765]]), Parameter containing:
tensor([[ 0.1106,  0.2840, -0.2463],
        [-0.1399, -0.2008, -0.4529],
        [ 0.5536,  0.1315, -0.3006],
        [-0.0438,  0.4574,  0.0928]]), Parameter containing:
tensor([[-0.4216,  0.1111,  0.2893, -0.2780]])]

[None, None, None]


In [13]:
# weight initialize 에 활용
for m in model.modules():
    if isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight)
print([m.weight for m in model.modules() if isinstance(m, nn.Linear)])

[Parameter containing:
tensor([[ 0.4282,  0.0053],
        [-0.9767, -1.1543],
        [-0.3304, -0.5403]]), Parameter containing:
tensor([[ 0.3336,  0.8146, -0.3432],
        [-0.7594, -0.2373,  1.3115],
        [ 1.0854, -0.0570,  1.7495],
        [-2.1798, -0.8359, -0.7199]]), Parameter containing:
tensor([[ 0.0904,  0.5306, -0.6739,  0.0821]])]


In [15]:
x = torch.randn(100,2)
y = list(model.children())[0](x)
print(y.shape)

torch.Size([100, 3])


### ModuleList vs Sequential

In [21]:
fc = nn.Linear(3,3)
layer_list = [fc for _ in range(5)]

In [23]:
layers1 = nn.Sequential(*layer_list)
layers2 = nn.ModuleList(layer_list)
print(layers1)
print(layers2)

Sequential(
  (0): Linear(in_features=3, out_features=3, bias=True)
  (1): Linear(in_features=3, out_features=3, bias=True)
  (2): Linear(in_features=3, out_features=3, bias=True)
  (3): Linear(in_features=3, out_features=3, bias=True)
  (4): Linear(in_features=3, out_features=3, bias=True)
)
ModuleList(
  (0-4): 5 x Linear(in_features=3, out_features=3, bias=True)
)


In [32]:
x = torch.randn(2,3)
y1 = layers1(x)
print(y1)

# 에러남
# y2 = layers2(x)
# print(y2)

for layer in layers2:
    x = layer(x)
print(x)

tensor([[ 1.0271, -0.3800, -0.5289],
        [ 0.9759, -0.3498, -0.4998]], grad_fn=<AddmmBackward0>)
tensor([[ 1.0271, -0.3800, -0.5289],
        [ 0.9759, -0.3498, -0.4998]], grad_fn=<AddmmBackward0>)
tensor([[ 1.0029, -0.3905, -0.5127],
        [ 1.0046, -0.3902, -0.5138]], grad_fn=<AddmmBackward0>)


In [38]:
# 이럴거면 걍 리스트 쓰지 왜 nn.ModuleList 를 쓸까?
class TestNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        # self.Module_List = [nn.Linear(3, 3), nn.Linear(3, 3)]
        self.Module_List = nn.ModuleList([nn.Linear(3, 3), nn.Linear(3, 3)])
        
    def forward(self, x):
        for layer in self.Module_List:
            x = layer(x)
        return x

In [39]:
model = TestNet()
x = torch.randn(2,3)
print(model(x))
print(model) # 그냥 리스트로 하면 등록이 안돼있다!

optimizer = optim.Adam(model.parameters(), lr=0.001) # 등록이 안되어있으면 parameter를 못 찾는다!

In [None]:
# 그럼 nn.Sequential 쓰고 말지 왜 굳이 nn.ModuleList?
class small_block(nn.Module):
    def __init__(self):
        super().__init__()
        self.block_x = nn.Linear(1,1)
        self.block_y = nn.Linear(1,1)
        
    def forward(self, x, y):
        x = self.block_x(x)
        y = self.block_y(y)
        return x, y

block = small_block()
print(block)
model = nn.Sequential(block, block)
print(model)
# model(torch.randn(1,1), torch.randn(1,1)) # error!
# nn.Sequential 이 가지고 있는 forward 함수를 call 하기 때문에 입력을 두 개 넣으면 안된다!!

model = nn.ModuleList([block, block])
x = torch.randn(1)
y = torch.randn(1)
for block in model:
    x,y = block(x,y)
print(x,y)