In [3]:
import torch
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
from multiclass_functions import *

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(DEVICE)

cuda:0


In [None]:
transform_train = transforms.ToTensor()
transform_test = transforms.ToTensor()

In [None]:
BATCH_SIZE = 64
TRAIN_RATIO = 0.8

train_DS = datasets.STL10(root='./data', split="train", transform=transform_train, download=True)
NoT = int(len(train_DS)*TRAIN_RATIO)
NoV = len(train_DS) - NoT
train_DS, val_DS = torch.utils.data.random_split(train_DS, [NoT, NoV])
val_DS.transform = transform_test
test_DS = datasets.STL10(root='./data', split="test", transform=transform_test, download=True)

train_DL = torch.utils.data.DataLoader(dataset=train_DS, batch_size=BATCH_SIZE, shuffle=True)
val_DL = torch.utils.data.DataLoader(dataset=val_DS, batch_size=BATCH_SIZE, shuffle=True)
test_DL = torch.utils.data.DataLoader(dataset=test_DS, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
class CNN_deep(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv_block1 = nn.Sequential(nn.Conv2d(3,32,3, padding=1),
                                        nn.BatchNorm2d(32),
                                        nn.ReLU(),
                                        nn.Conv2d(32,32,3, padding=1),
                                        nn.BatchNorm2d(32),
                                        nn.ReLU())
        self.Maxpool1 = nn.MaxPool2d(kernel_size=2)
        
        self.conv_block2 = nn.Sequential(nn.Conv2d(32,64,3, padding=1),
                                        nn.BatchNorm2d(64),
                                        nn.ReLU(),
                                        nn.Conv2d(64,64,3, padding=1),
                                        nn.BatchNorm2d(64),
                                        nn.ReLU(),
                                        nn.Conv2d(64,64,3, padding=1),
                                        nn.BatchNorm2d(64),
                                        nn.ReLU())
        self.Maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        self.conv_block3 = nn.Sequential(nn.Conv2d(64,128,3, padding=1),
                                        nn.BatchNorm2d(128),
                                        nn.ReLU(),
                                        nn.Conv2d(128,128,3, padding=1),
                                        nn.BatchNorm2d(128),
                                        nn.ReLU(),
                                        nn.Conv2d(128,128,3, padding=1),
                                        nn.BatchNorm2d(128),
                                        nn.ReLU())
        self.Maxpool3 = nn.MaxPool2d(kernel_size=2)
        
        self.fc = nn.Sequential(nn.Linear(18432,512),
                                nn.Linear(512, 10))
    
    def forward(self, x):
        x = self.conv_block1(x)
        x = self.Maxpool1(x)
        x = self.conv_block2(x)
        x = self.Maxpool2(x)
        x = self.conv_block3(x)
        x = self.Maxpool3(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc(x)
                
        return x

In [None]:
model = CNN_deep()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
EPOCH = 100

loss_hist = Train(model=model, train_DL=train_DL, criterion=criterion, optimizer=optimizer, EPOCH=EPOCH)

In [None]:
save_model_path = "CNN_V2.pth"
torch.save(model.state_dict(), save_model_path)

In [None]:
load_model_path = "CNN_V2.pth"
load_model = CNN_deep().to(DEVICE)
load_model.load_state_dict(torch.load(load_model_path))

In [None]:
Test(load_model, test_DL)

### parameters() vs. modules() vs. children() 그리고 instance 의 활용

In [None]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Sequential(nn.Linear(2,3),
                                 nn.ReLU())
        self.fc2 = nn.Sequential(nn.Linear(3,4),
                                 nn.ReLU())
        self.fc_out = nn.Sequential(nn.Linear(4,1),
                                    nn.Sigmoid())
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc_out(x)
        return x

model = MLP()
print(model(torch.randn(2,2)).shape)
print(model)

In [None]:
list(model.parameters())

In [None]:
# for transfer learning
model = MLP()
print([p for p in model.parameters() if p.requires_grad])

In [None]:
for p in model.parameters():
    p.requires_grad = False

print([p for p in model.parameters() if p.requires_grad])

In [None]:
model = MLP()
# print([p for p in model.parameters() if p.requires_grad])

# for p in model.parameters():
#     p.requires_grad = False
    # print(p)
    # print("-"*20)

model.fc_out = nn.Linear(4,10)
# print("-"*20)
# print(list(model.parameters()))
# print([p for p in model.parameters() if p.requires_grad])
params = [p for p in model.parameters() if p.requires_grad]

print(params)

In [None]:
from torch import optim
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
list(model.named_parameters())

In [None]:
list(model.modules())

In [None]:
print([m for m in model.modules() if isinstance(m, nn.Linear)])
print([m.weight for m in model.modules() if isinstance(m, nn.Linear)])
print([m.weight.grad for m in model.modules() if isinstance(m, nn.Linear)])

In [None]:
# weight initailization에 활용

for m in model.modules():
    if isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight)

print([m.weight for m in model.modules() if isinstance(m, nn.Linear)])

In [None]:
list(model.children())

In [None]:
print([m for m in model.children() if isinstance(m, nn.Linear)])

In [None]:
print([m for m in model.modules() if isinstance(m, nn.Linear)])

In [None]:
x = torch.randn(2,2)

y = list(model.children())[0](x)

print(y)
print(y.shape)

In [None]:
print(*list(model.children())[:2])

In [None]:
print(list(model.children())[:2])

### ModuleList v.s. Sequential

In [4]:
fc = nn.Linear(3,3)
layer_list = [fc for _ in range(5)]

In [5]:
layer_list

[Linear(in_features=3, out_features=3, bias=True),
 Linear(in_features=3, out_features=3, bias=True),
 Linear(in_features=3, out_features=3, bias=True),
 Linear(in_features=3, out_features=3, bias=True),
 Linear(in_features=3, out_features=3, bias=True)]

In [12]:
layers1 = nn.Sequential(*layer_list)
layers2 = nn.ModuleList(layer_list)
print(layers1)
print(layers2)

Sequential(
  (0): Linear(in_features=3, out_features=3, bias=True)
  (1): Linear(in_features=3, out_features=3, bias=True)
  (2): Linear(in_features=3, out_features=3, bias=True)
  (3): Linear(in_features=3, out_features=3, bias=True)
  (4): Linear(in_features=3, out_features=3, bias=True)
)
ModuleList(
  (0-4): 5 x Linear(in_features=3, out_features=3, bias=True)
)


In [17]:
x = torch.randn(2,3)
y = layers1(x)
print(y)

for layer in layers2:
    x = layer(x)
print(x)    
    
for layer in layer_list:
    x = layer(x)
print(x)

tensor([[ 0.1083, -0.2002,  0.0855],
        [ 0.0893, -0.2087,  0.1074]], grad_fn=<AddmmBackward0>)
tensor([[ 0.1083, -0.2002,  0.0855],
        [ 0.0893, -0.2087,  0.1074]], grad_fn=<AddmmBackward0>)


In [31]:
# 걍 리스트 쓰지 왜 nn.ModuleList 를 쓸까?
class TestNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.Module_List = [nn.Linear(3, 3), nn.Linear(3, 3)]
        # self.Module_List = nn.ModuleList([nn.Linear(3, 3), nn.Linear(3, 3)])
    
    def forward(self, x):
        for layer in self.Module_List:
            x = layer(x)
            
        return x

In [32]:
model = TestNet()
x = torch.randn(2,3)
print(model(x))
print(model) # 그냥 리스트로 하면 등록이 안돼있다!

optimizer = optim.Adam(model.parameters(), lr=0.001) # 등록이 안되어있으면 parameter를 못 찾는다!

tensor([[ 1.0270, -0.7560, -0.3374],
        [ 1.2017, -0.7936, -0.5331]], grad_fn=<AddmmBackward0>)
TestNet()


ValueError: optimizer got an empty parameter list

In [41]:
# 그럼 nn.Sequential 쓰고 말지 왜 굳이 nn.ModuleList?
class small_block(nn.Module):
    def __init__(self):
        super().__init__()
        self.block_x = nn.Linear(1,1)
        self.block_y = nn.Linear(1,1)
        
    def forward(self, x, y):
        x = self.block_x(x)
        y = self.block_y(y)
        return x, y

block = small_block()
print(block)
model = nn.Sequential(block, block)
print(model)
# model(torch.randn(1,1), torch.randn(1,1)) # error!
# nn.Sequential 이 가지고 있는 forward 함수를 call 하기 때문에 입력을 두 개 넣으면 안된다!!

model = nn.ModuleList([block, block])
x = torch.randn(1)
y = torch.randn(1)
for block in model:
    x,y = block(x,y)
print(x,y)

small_block(
  (block_x): Linear(in_features=1, out_features=1, bias=True)
  (block_y): Linear(in_features=1, out_features=1, bias=True)
)
Sequential(
  (0): small_block(
    (block_x): Linear(in_features=1, out_features=1, bias=True)
    (block_y): Linear(in_features=1, out_features=1, bias=True)
  )
  (1): small_block(
    (block_x): Linear(in_features=1, out_features=1, bias=True)
    (block_y): Linear(in_features=1, out_features=1, bias=True)
  )
)
tensor([-0.0106], grad_fn=<ViewBackward0>) tensor([-0.4860], grad_fn=<ViewBackward0>)
