먼저, 기본적인 딥러닝 구조들을 살펴보자.
딥러닝 실행을 위해서는 보통

import torch
import torch.nn as nn
import torch.optim as optim

를 import 해와야 한다.

In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import SubsetRandomSampler
import PIL
import os
from sklearn.preprocessing import OneHotEncoder
import pickle

모델을 돌리기 위해서 data를 load해오자.

In [2]:
#The output of torchvision datasets are PILImage images of range [0, 1]. We transform them to Tensors of normalized range [-1, 1].
transform = transforms.Compose(#전처리, 일반적으로 사용하는 것이 정해져있음
    [transforms.ToTensor(),
     transforms.Normalize((0.5, ), (0.5, ))])#normalize해줌: (0.5,0.5,0.5)로 하면 RGB인거고 MNIST는 gray라서 (0.5,)

# MNIST Dataset
train_dataset = datasets.MNIST(root='./mnist_data/', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./mnist_data/', train=False, transform=transform)

# Data Loader 
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [3]:
train_dataset

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./mnist_data/
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.5,), std=(0.5,))
           )

In [4]:
test_dataset

Dataset MNIST
    Number of datapoints: 10000
    Root location: ./mnist_data/
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.5,), std=(0.5,))
           )

In [5]:
vars(train_loader)

{'dataset': Dataset MNIST
     Number of datapoints: 60000
     Root location: ./mnist_data/
     Split: Train
     StandardTransform
 Transform: Compose(
                ToTensor()
                Normalize(mean=(0.5,), std=(0.5,))
            ),
 'num_workers': 0,
 'prefetch_factor': 2,
 'pin_memory': False,
 'timeout': 0,
 'worker_init_fn': None,
 '_DataLoader__multiprocessing_context': None,
 '_dataset_kind': 0,
 'batch_size': 32,
 'drop_last': False,
 'sampler': <torch.utils.data.sampler.RandomSampler at 0x7f4dba0ef810>,
 'batch_sampler': <torch.utils.data.sampler.BatchSampler at 0x7f4dba0efd10>,
 'generator': None,
 'collate_fn': <function torch.utils.data._utils.collate.default_collate(batch)>,
 'persistent_workers': False,
 '_DataLoader__initialized': True,
 '_IterableDataset_len_called': None,
 '_iterator': None}

NN model 정의하자.

In [6]:
import torch.nn as nn#weight공유가 가능함
import torch.nn.functional as F#단순 연산만 해줌

class Network(nn.Module):#torch.nn에 있는 Module을 쓸거다: 
    def __init__(self):
        super(Network, self).__init__()#super을 통해서 nn.Module을 상속받았어: super는 기반 class의 method를 호출해주는 애
        self.dense1 = nn.Linear(784, 512)
        self.dense2 = nn.Linear(512, 10)

    def forward(self, inputs):#내가 쓸 layer을 생성한다: forward를 잘 정의한다
        output = inputs.view(-1,784)
        output = self.dense1(output)
        output = self.dense2(output)
        return output

In [8]:
import torch.optim as optim

# use gpu -> .cuda()  -> model and Variable 
model = Network().cuda()#cuda를 이용하면 GPU로 할당이 가능하다: variable().cuda()로 사용을 한다
# This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class. so not use softmax()

criterion = nn.CrossEntropyLoss()#손실함수->기본적으로 logsoftmax()가 내장되어있음.
optimizer = optim.SGD(model.parameters(), lr=0.01)#model의 parameter를 할당해줌; RMSprop는 이동평균, adagrad나 adam같은애들도 쓸수 있겠지!

In [9]:
model#model은 내가 정의한 network: input feature은 MNIST: 28*28 = 784, output; 10개 class

Network(
  (dense1): Linear(in_features=784, out_features=512, bias=True)
  (dense2): Linear(in_features=512, out_features=10, bias=True)
)

train 단계

In [10]:
from torch.autograd import Variable # autograd는 자동미분해주는애, 

def train(epoch):
    model.train()
    train_loss = 0
    train_acc = 0
    
    for data, label in train_loader:#train_loader: train data를 받아오는 함수로 data, label값을 return해줌
        data, label = Variable(data).cuda(), Variable(label).cuda()#cuda에 올려준다: variable class가 tensor를 감싸고 있는
        
#     for i, datas in enumerate(train_loader, 0):
#         data, labels = datas
#         data, labels = data.cuda(), labels.cuda()
        
        output = model(data)#forward를 해서 data를 모델에 전달해서 예상하는 label값을 계산함.
        loss = criterion(output, label)#내 모델의 output과 label 사이의 loss를 계산: 앞에서 정의한 criterion을 이용해서,
        
        optimizer.zero_grad()#갱신할 variable에 대한 변화 다 0으로 만듦: gradients값들을 추후에 backward를 해줄때 계속 더해주기 때문, 즉 iteration 한 번 돌때마다 grad 초기화
        
        loss.backward() #역전파단계: 모델의 variable에 대한 loss의 변화 계산: 자동으로 모든 grad계산해줌
        optimizer.step()# 가중치 갱신: backprop를 통해서 계산된 변화를 이용해 parameter를 갱신한다
        
        train_loss += loss.item()
        pred = output.data.max(1, keepdim=True)[1]#최대값의 index return
        train_acc += pred.eq(label.data.view_as(pred)).sum()#pred와 data값을 비교해서 같으면 1
    train_loss /= len(train_loader.dataset)
    print('Train Epoch: {} Average loss: {:.4f} Accuracy : {:.4f}%)'.format(epoch, train_loss, 100. * train_acc / len(train_loader.dataset)))

In [11]:
def test():
    model.eval()
    test_loss = 0
    test_acc = 0
    for data, target in test_loader:
        # volatile=True no use backprob
        data, target = Variable(data, volatile=True).cuda(), Variable(target).cuda()
        output = model(data)
        test_loss += criterion(output, target).item()
        pred = output.data.max(1, keepdim=True)[1]
        test_acc += pred.eq(target.data.view_as(pred)).sum()

    test_loss /= len(test_loader.dataset)
    print('Test set: Average loss: {:.4f}, Accuracy: {:.0f}%)'.format(test_loss, 100. * test_acc / len(test_loader.dataset)))


In [12]:
for epoch in range(1, 6):
    train(epoch)
    test()

Train Epoch: 1 Average loss: 0.0157 Accuracy : 86.5133%)


  import sys


Test set: Average loss: 0.0102, Accuracy: 91%)
Train Epoch: 2 Average loss: 0.0103 Accuracy : 90.4917%)
Test set: Average loss: 0.0096, Accuracy: 91%)
Train Epoch: 3 Average loss: 0.0097 Accuracy : 91.0900%)
Test set: Average loss: 0.0091, Accuracy: 92%)
Train Epoch: 4 Average loss: 0.0093 Accuracy : 91.4450%)
Test set: Average loss: 0.0090, Accuracy: 92%)
Train Epoch: 5 Average loss: 0.0092 Accuracy : 91.6183%)
Test set: Average loss: 0.0089, Accuracy: 92%)


CNN은 보통 이미지 처리에서 많이 사용하며, 
앞에서 쓴 dense layer에 비해 시냅스 강도가 약해짐; 필요한애만 쓸 수 있고, 저장해야 할 parameter 수가 줄어듦!

In [14]:
# nn.Sequantial은 forward에서 구현할 layer을 더 가독성있게 작성해줄 수 있는 애임, 예를들어서
class MyNeuralNetwork(nn.Module): 
    def __init__(self): 
        super(MyNeuralNetwork, self).__init__() 
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5) 
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=30, kernel_size=5) 
        self.fc1 = nn.Linear(in_features=30*5*5, out_features=128, bias=True) 
        self.fc2 = nn.Linear(in_features=128, out_features=10, bias=True) 
        
        
    def forward(self, x): 
        x = F.relu(self.conv1(x), inplace=True) 
        x = F.max_pool2d(x, (2, 2)) 
        x = F.relu(self.conv2(x), inplace=True) 
        x = F.max_pool2d(x, (2, 2)) 
        x = x.view(x.shape[0], -1) 
        x = F.relu(self.fc1(x), inplace=True) 
        x = F.relu(self.fc2(x), inplace=True) 
        
        return x
    
#이 class를 layer 단위로 보고 싶으면
class MyNeuralNetwork(nn.Module): 
    
    def __init__(self): 
        super(MyNeuralNetwork, self).__init__() 
        self.layer1 = nn.Sequential( 
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5), 
            nn.ReLU(inplace=True), nn.MaxPool2d(2) ) 
        self.layer2 = nn.Sequential( 
            nn.Conv2d(in_channels=64, out_channels=30, kernel_size=5), 
            nn.ReLU(inplace=True), nn.MaxPool2d(2) ) 
        self.layer3 = nn.Sequential( 
            nn.Linear(in_features=30*5*5, out_features=128, bias=True), 
            nn.ReLU(inplace=True) ) 
        self.layer4 = nn.Sequential( 
            nn.Linear(in_features=128, out_features=10, bias=True), 
            nn.ReLU(inplace=True) ) 
        #이렇게 각 layer을 sequential로 정의를 한 다음에
        
        def forward(self, x): 
            x = self.layer1(x) 
            x = self.layer2(x) 
            x = x.view(x.shape[0], -1) 
            x = self.layer3(x) 
            x = self.layer4(x) 
            return x
#forward에서 layer로 작성을 해줌

#출처: https://dororongju.tistory.com/147 [웹 개발 메모장]