In [1]:
import torch
import torch.nn as nn

In [2]:
# 배치 크기 x 채널 x 높이(height) x 너비(width)의 크기의 텐서를 선언
inputs = torch.Tensor(1, 1, 28, 28)
print("텐서의 크기 : {}".format(inputs.shape))

텐서의 크기 : torch.Size([1, 1, 28, 28])


In [13]:
# 첫 번째 합성곱 층. 1채널 짜리 입력받아서 32채널로 뽑아낸다.
conv1 = nn.Conv2d(1, 32, 3, padding=1)
print(conv1)

Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [4]:
# 두 번째 합성곱 층, 32채널 짜리를 뽑아내서 64채널로 뽑아낸다.
conv2 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3, padding = 1)
print(conv2)

Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [6]:
# 맥스풀링. 정수 하나를 인자로 넣으면 커널 사이즈와 스트라이드가 둘 다 해당값으로 지정됨.
pool = nn.MaxPool2d(2)
print(pool)

MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)


# 연결하여 모델 만들기

In [14]:
out = conv1(inputs)
print(out.shape)

torch.Size([1, 32, 28, 28])


In [15]:
out = pool(out)
print(out.shape)

torch.Size([1, 32, 14, 14])


In [16]:
out = conv2(out)
print(out.shape)

torch.Size([1, 64, 14, 14])


In [17]:
out = pool(out)
print(out.shape)

torch.Size([1, 64, 7, 7])


In [28]:
out.size(0), out.size(1), out.size(2), out.size(3)

(1, 64, 7, 7)

In [29]:
# .view()를 사용하여 텐서를 펼치기
out = out.view(out.size(0), -1)
print(out.shape)

torch.Size([1, 3136])


In [30]:
# Fully_Connected layer 통과시키기.
fc = nn.Linear(3136, 10) # input_dim = 3,136, output_dim = 10
out = fc(out)
print(out.shape)

torch.Size([1, 10])


# CNN으로 MNIST 분류하기

In [49]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init
import torch.nn as nn

In [34]:
# GPU 사용 가능한지 확인
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
# 랜덤 시드 고정
torch.manual_seed(777)

# GPU 사용 가능일 경우 랜덤 시드 고정
if device == 'cuda':
    torch.nn.manual_seed_all(777)

cpu


In [38]:
# 학습에 사용할 파라미터 설정.
lr = 0.001
t_epochs = 15
b_size = 100

In [39]:
# 데이터로더를 사용하여 데이터를 다루기 위해 데이터셋 정의.
mnist_train = dsets.MNIST(root='MNIST_data/',
                         train = True, # 훈련 데이터 다운로드
                         transform = transforms.ToTensor(),
                         download = True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train = False, # 테스트 데이터 다운로드
                         transform = transforms.ToTensor(),
                         download = True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST\raw\train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST\raw\train-images-idx3-ubyte.gz to MNIST_data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST\raw\train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST\raw\train-labels-idx1-ubyte.gz to MNIST_data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST\raw\t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST\raw\t10k-images-idx3-ubyte.gz to MNIST_data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST\raw\t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST\raw\t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST\raw



In [41]:
# 데이터로더를 사용하여 배치 크기 정해준다.
data_loader = torch.utils.data.DataLoader(dataset = mnist_train,
                                          batch_size = b_size,
                                          shuffle = True,
                                          drop_last = True)

In [112]:
next(iter(data_loader))

[tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         ...,
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ..

In [63]:
# 클래스로 모델 설계.
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # 첫 번째 층
        # Image input shape = (?, 28, 28, 1)
        #    Conv    -> (?, 28, 28, 32)
        #    Pool    -> (?, 14, 14, 32)
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2))
        
        # 두 번째 층
        # Image input shape = (?, 14, 14, 32)
        #    Conv    -> (?, 14, 14, 64)
        #    Pool    -> (?, 7, 7, 64)
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2))
        
        # 전결합층 7*7*64 inputs -> 10 outputs
        self.fc = nn.Linear(7*7*64, 10, bias=True)
        
        # 전결합층 한정으로 가중치 초기화
        nn.init.xavier_uniform_(self.fc.weight)
        
    def forward(self, x):
        out = self.layer2(self.layer1(x))
        out= out.view(out.size(0), -1)
        return self.fc(out)

In [70]:
model = CNN().to(device)

In [71]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = lr)

In [72]:
total_batch = len(data_loader)
print("총 배치의 수 : {}".format(total_batch))

총 배치의 수 : 600


In [73]:
from tqdm import tqdm

In [74]:
for i in tqdm(range(t_epochs)):
    avg_cost = 0
    
    for X, Y in data_loader: # 미니 배치 단위로 꺼내온다. X는 미니배치, Y는 레이블
        # image is already size of (28*28), no reshape
        # label is not one-hot encoded
        X = X.to(device)
        Y = Y.to(device)
        
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('[Epoch: {:>4}] cost = {:>.9}'.format(i + 1, avg_cost))

  7%|█████▌                                                                             | 1/15 [00:43<10:11, 43.71s/it]

[Epoch:    1] cost = 0.252588153


 13%|███████████                                                                        | 2/15 [01:29<09:40, 44.68s/it]

[Epoch:    2] cost = 0.0648543537


 20%|████████████████▌                                                                  | 3/15 [02:16<09:09, 45.79s/it]

[Epoch:    3] cost = 0.0467697121


 27%|██████████████████████▏                                                            | 4/15 [03:00<08:19, 45.39s/it]

[Epoch:    4] cost = 0.0376489572


 33%|███████████████████████████▋                                                       | 5/15 [03:45<07:30, 45.08s/it]

[Epoch:    5] cost = 0.0331983343


 40%|█████████████████████████████████▏                                                 | 6/15 [04:29<06:43, 44.84s/it]

[Epoch:    6] cost = 0.0261197574


 47%|██████████████████████████████████████▋                                            | 7/15 [05:14<05:57, 44.74s/it]

[Epoch:    7] cost = 0.0233416744


 53%|████████████████████████████████████████████▎                                      | 8/15 [05:59<05:13, 44.84s/it]

[Epoch:    8] cost = 0.0192288663


 60%|█████████████████████████████████████████████████▊                                 | 9/15 [06:44<04:29, 44.95s/it]

[Epoch:    9] cost = 0.016535975


 67%|██████████████████████████████████████████████████████▋                           | 10/15 [07:29<03:44, 44.80s/it]

[Epoch:   10] cost = 0.0133624785


 73%|████████████████████████████████████████████████████████████▏                     | 11/15 [08:14<02:59, 44.87s/it]

[Epoch:   11] cost = 0.0125022372


 80%|█████████████████████████████████████████████████████████████████▌                | 12/15 [08:58<02:14, 44.82s/it]

[Epoch:   12] cost = 0.00955538452


 87%|███████████████████████████████████████████████████████████████████████           | 13/15 [09:44<01:30, 45.18s/it]

[Epoch:   13] cost = 0.00948165264


 93%|████████████████████████████████████████████████████████████████████████████▌     | 14/15 [10:30<00:45, 45.19s/it]

[Epoch:   14] cost = 0.00773167564


100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [11:15<00:00, 45.04s/it]

[Epoch:   15] cost = 0.00647154311





In [75]:
# 테스트 해보기
# 학습을 진행하지 않을 것이므로 torch.no_grad()
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())



Accuracy: 0.9889000058174133


In [98]:
mnist_test.test_data.view(len(mnist_test),1, 28, 28).shape

torch.Size([10000, 1, 28, 28])

In [93]:
torch.argmax(prediction[:5], 1) == Y_test[:5]

tensor([True, True, True, True, True])

In [92]:
Y_test

tensor([7, 2, 1,  ..., 4, 5, 6])

# 깊은 CNN 쌓기

In [113]:
class CNN2(nn.Module):
    def __init__(self):
        super(CNN2, self).__init__()
        self.keep_prob = 0.5
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels = 1, out_channels = 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2))
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2))
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1))
        
        self.fc1 = nn.Linear(4*4*128, 625, bias=True)
        nn.init.xavier_uniform_(self.fc1.weight)
        self.layer4 = nn.Sequential(
            self.fc1,
            nn.ReLU(),
            nn.Dropout(p=1- self.keep_prob))
        
        
        self.fc2 = nn.Linear(625, 10, bias=True)
        nn.init.xavier_uniform_(self.fc2.weight)
        
    def forward(self, x):
        out = self.layer3(self.layer2(self.layer1(x)))
        out = out.view(out.size(0), -1)
        out = self.fc2(self.layer4(out))
        return out

In [114]:
model2 = CNN2().to(device)

In [115]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model2.parameters(), lr = lr)

In [116]:
for i in tqdm(range(t_epochs)):
    avg_cost = 0
    
    for X, Y in data_loader: # 미니 배치 단위로 꺼내온다. X는 미니배치, Y는 레이블
        # image is already size of (28*28), no reshape
        # label is not one-hot encoded
        X = X.to(device)
        Y = Y.to(device)
        
        hypothesis = model2(X)
        cost = criterion(hypothesis, Y)
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('[Epoch: {:>4}] cost = {:>.9}'.format(i + 1, avg_cost))

  7%|█████▌                                                                             | 1/15 [01:08<15:54, 68.17s/it]

[Epoch:    1] cost = 0.188868806


 13%|███████████                                                                        | 2/15 [02:22<15:33, 71.78s/it]

[Epoch:    2] cost = 0.0504906327


 20%|████████████████▌                                                                  | 3/15 [03:39<14:50, 74.22s/it]

[Epoch:    3] cost = 0.0366849564


 27%|██████████████████████▏                                                            | 4/15 [04:54<13:40, 74.59s/it]

[Epoch:    4] cost = 0.0282737333


 33%|███████████████████████████▋                                                       | 5/15 [06:10<12:31, 75.13s/it]

[Epoch:    5] cost = 0.0217411164


 40%|█████████████████████████████████▏                                                 | 6/15 [07:23<11:07, 74.21s/it]

[Epoch:    6] cost = 0.0174565427


 47%|██████████████████████████████████████▋                                            | 7/15 [08:36<09:49, 73.74s/it]

[Epoch:    7] cost = 0.0164622497


 53%|████████████████████████████████████████████▎                                      | 8/15 [09:48<08:32, 73.27s/it]

[Epoch:    8] cost = 0.013136874


 60%|█████████████████████████████████████████████████▊                                 | 9/15 [11:02<07:21, 73.59s/it]

[Epoch:    9] cost = 0.0136729199


 67%|██████████████████████████████████████████████████████▋                           | 10/15 [12:16<06:07, 73.56s/it]

[Epoch:   10] cost = 0.0121253403


 73%|████████████████████████████████████████████████████████████▏                     | 11/15 [13:28<04:53, 73.32s/it]

[Epoch:   11] cost = 0.0101460293


 80%|█████████████████████████████████████████████████████████████████▌                | 12/15 [14:42<03:39, 73.26s/it]

[Epoch:   12] cost = 0.00757756224


 87%|███████████████████████████████████████████████████████████████████████           | 13/15 [15:55<02:26, 73.36s/it]

[Epoch:   13] cost = 0.0088455053


 93%|████████████████████████████████████████████████████████████████████████████▌     | 14/15 [17:08<01:13, 73.37s/it]

[Epoch:   14] cost = 0.00655586924


100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [18:22<00:00, 73.49s/it]

[Epoch:   15] cost = 0.00815993827





In [117]:
# 학습을 진행하지 않을 것이므로 torch.no_grad()
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model2(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())



Accuracy: 0.9034000039100647
