### 6) mnist 데이터셋(CNN)

In [10]:
from keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [11]:
X_train.shape

(60000, 28, 28)

In [12]:
# 샘플수, 채널(흑백1/컬러3), 가로, 세로

X_train= X_train.reshape(-1,1,28,28)
X_test= X_test.reshape(-1,1,28,28)

X_train=X_train/255.
X_test=X_test/255.

In [13]:
import torch

# 넘파이배열을 텐서로 이동

X_train=torch.from_numpy(X_train).float()
y_train=torch.from_numpy(y_train.astype('int32')).long()

X_test=torch.from_numpy(X_test).float()
y_test=torch.from_numpy(y_test.astype('int32')).long()

print(X_train.shape)
print(X_test.shape) 

torch.Size([60000, 1, 28, 28])
torch.Size([10000, 1, 28, 28])


In [14]:
#텐서를 gpu로 옮기고

X_train=X_train.cuda()
y_train=y_train.cuda()

X_test=X_test.cuda()
y_test=y_test.cuda()

In [15]:
from torch.utils.data import DataLoader, TensorDataset

# 독립변수와 종속변수 텐서를 합침
train = TensorDataset(X_train, y_train)
print(train[0])

# 미니배치 분할
train_loader = DataLoader(train, batch_size=100, shuffle=True)

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000

In [16]:
import torch.nn as nn
import torch.nn.functional as F

# 신경망 구성

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5 ) # 입력 채널 수(흑백1,컬러3), 출력 채널 수, 필터 크기
        self.conv2 = nn.Conv2d(6, 16, 5)
        # Fully Connected Layer
        self.fc1 = nn.Linear(256, 64)
        self.fc2 = nn.Linear(64, 10)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2) # 풀링 영역 크기
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, 256)
#                   1차원으로 변환
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=0)    

# 인스턴스 생성
model = Net().cuda()

In [17]:
import torch.optim as optim
from torch.autograd import Variable

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

for epoch in range(300):
    total_loss = 0
    for X_train, y_train in train_loader:
        X_train, y_train = Variable(X_train), Variable(y_train)     # 계산 그래프 구성

        #텐서를 gpu로 이동시킴
        X_train=X_train.cuda()
        y_train=y_train.cuda()

        # 경사 초기화
        optimizer.zero_grad()

        # 순전파 계산
        output = model(X_train)

        # 오차계산
        loss = criterion(output, y_train)
        
        # 역전파 계산
        loss.backward()

        # 가중치 업데이트
        optimizer.step()

        # 누적 오차 계산
        total_loss += loss.data

    # 50회 반복마다 누적 오차 출력
    if (epoch+1) % 50 == 0:
        print(epoch+1, total_loss)



50 tensor(21.0574, device='cuda:0')
100 tensor(10.5922, device='cuda:0')
150 tensor(6.0248, device='cuda:0')
200 tensor(4.5976, device='cuda:0')
250 tensor(2.4087, device='cuda:0')
300 tensor(1.9180, device='cuda:0')


In [18]:
X_test, y_test = Variable(X_test), Variable(y_test)

# [0] values, [1] indices

# 모형이 분류한 값들(10개) 중 가장 큰 값과 인덱스
# 출력이 0 또는 1이 되게 함
result = torch.max(model(X_test).data, 1)[1]
#print(result)

# 모형의 정확도 측정
# gpu에 저장된 텐서를 cpu로 이동시킴
y_test=y_test.cpu()
result=result.cpu()

accuracy = sum(y_test.data.numpy() == result.numpy()) / len(y_test.data.numpy())
accuracy

0.9876

In [19]:
print(model)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=10, bias=True)
)


In [20]:
from torchinfo import summary

batch_size=100

# batch_size, channels, height, width

summary(model, input_size=(batch_size, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
Net                                      [100, 10]                 --
├─Conv2d: 1-1                            [100, 6, 24, 24]          156
├─Conv2d: 1-2                            [100, 16, 8, 8]           2,416
├─Linear: 1-3                            [100, 64]                 16,448
├─Linear: 1-4                            [100, 10]                 650
Total params: 19,670
Trainable params: 19,670
Non-trainable params: 0
Total mult-adds (M): 26.16
Input size (MB): 0.31
Forward/backward pass size (MB): 3.64
Params size (MB): 0.08
Estimated Total Size (MB): 4.04