# CNN(Convolutional Neural Network)
> 참고: 책 <<밑바닥부터 시작하는 딥러닝1>>, pp.242-253

In [1]:
import sys
sys.path.append('../')

import numpy as np
from common.util import im2col, col2im

합성곱 계층 구현

In [10]:
class Convolution:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad

        # 중간 데이터（backward 시 사용）
        self.x = None   
        self.col = None
        self.col_W = None
        
        # 가중치와 편향 매개변수의 기울기
        self.dW = None
        self.db = None
    
    def forward(self, x):
        # filter shape: (출력 채널 수, 입력 채널 수, 높이, 너비)
        FN, C, FH, FW = self.W.shape

        # input data shape: (데이터 수, 채널 수, 높이, 너비)
        N, C, H, W = x.shape

        # 출력(feature map)의 높이와 너비
        out_h = int(1 + (H + 2 * self.pad - FH) / self.stride)
        out_w = int(1 + (W + 2 * self.pad - FW) / self.stride)

        col = im2col(x, FH, FW, self.stride, self.pad)
        
        # 필터 전개
        col_W = self.W.reshape(FN, -1).T

        out = np.dot(col, col_W) + self.b

        # transpose로 축의 순서를 바꿔준다.
        # (N, H, W, C) -> (N, C, H, W)
        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)

        self.x = x
        self.col = col
        self.col_W = col_W
        
        return out
    
    def backward(self, dout):
        FN, C, FH, FW = self.W.shape
        dout = dout.transpose(0,2,3,1).reshape(-1, FN)

        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.col.T, dout)
        self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)

        dcol = np.dot(dout, self.col_W.T)
        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)

        return dx


풀링 계층 구현

In [11]:
class Pooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad

        self.x = None
        self.arg_max = None
    
    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)

        # 전개
        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h * self.pool_w)

        arg_max = np.argmax(col, axis=1)
        
        # 최대값
        out = np.max(col, axis=1)

        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)

        self.x = x
        self.arg_max = arg_max

        return out

    
    def backward(self, dout):
        dout = dout.transpose(0, 2, 3, 1)
        
        pool_size = self.pool_h * self.pool_w
        dmax = np.zeros((dout.size, pool_size))
        dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
        dmax = dmax.reshape(dout.shape + (pool_size,)) 
        
        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
        dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
        
        return dx

CNN 구현

In [14]:
from collections import OrderedDict
from common.layers import Relu, Affine, SoftmaxWithLoss
# from common.gradient import numerical_gradient
# import pickle

class SimpleConvNet:
    def __init__(self, input_dim=(1, 28, 28),
                 conv_param={
                     'filter_num': 30,
                     'filter_size': 5,
                     'pad': 0,
                     'stride': 1
                 }, hidden_size=100, output_size=10, weight_init_std=0.01):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]

        # 합성곱 계층의 출력 크기
        conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1

        # 풀링 계층의 출력 크기
        pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2))
        
        # 가중치 파라미터
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)
        self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)

        # 레이어
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], filter_stride, filter_pad)
        self.layers['Relu1'] = Relu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
        self.last_layer = SoftmaxWithLoss()
    
    
    # 추론
    def predict(self, x):
        '''
        layer에 추가한 계층을 맨 앞부터 차례대로 forward를 호출하여 
        결과를 다음 계층에 전달
        '''
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    # 손실 함수 값 구하기
    def loss(self, x, t):
        '''
        x: 입력 데이터
        t: 정답 레이블
        '''
        y = self.predict(x)
        return self.last_layer.forward(y, t)
    

    # 오차역전파
    def gradient(self, x, t):
        self.loss(x, t)

        # 역전파
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        
        # 기울기 저장
        grads = {}
        grads['W1'] = self.layers['Conv1'].dW
        grads['b1'] = self.layers['Conv1'].db
        grads['W2'] = self.layers['Affine1'].dW
        grads['b2'] = self.layers['Affine1'].db
        grads['W3'] = self.layers['Affine2'].dW
        grads['b3'] = self.layers['Affine2'].db

        return grads
    

    # def accuracy(self, x, t, batch_size=100):
    #     if t.ndim != 1 : t = np.argmax(t, axis=1)
        
    #     acc = 0.0
        
    #     for i in range(int(x.shape[0] / batch_size)):
    #         tx = x[i*batch_size:(i+1)*batch_size]
    #         tt = t[i*batch_size:(i+1)*batch_size]
    #         y = self.predict(tx)
    #         y = np.argmax(y, axis=1)
    #         acc += np.sum(y == tt) 
        
    #     return acc / x.shape[0]


    # def numerical_gradient(self, x, t):
    #     loss_w = lambda w: self.loss(x, t)

    #     grads = {}
    #     for idx in (1, 2, 3):
    #         grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
    #         grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])

    #     return grads

    # def save_params(self, file_name="params.pkl"):
    #     params = {}
    #     for key, val in self.params.items():
    #         params[key] = val
    #     with open(file_name, 'wb') as f:
    #         pickle.dump(params, f)

    # def load_params(self, file_name="params.pkl"):
    #     with open(file_name, 'rb') as f:
    #         params = pickle.load(f)
    #     for key, val in params.items():
    #         self.params[key] = val

    #     for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):
    #         self.layers[key].W = self.params['W' + str(i+1)]
    #         self.layers[key].b = self.params['b' + str(i+1)]

MNIST 학습

In [15]:
# import matplotlib.pyplot as plt
# from datasets.mnist import load_mnist
# from common.trainer import Trainer

# # 데이터 읽기
# (x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)

# # 시간이 오래 걸릴 경우 데이터를 줄인다.
# # x_train, t_train = x_train[:5000], t_train[:5000]
# # x_test, t_test = x_test[:1000], t_test[:1000]

# max_epochs = 20

# network = SimpleConvNet(input_dim=(1,28,28), 
#                         conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
#                         hidden_size=100, output_size=10, weight_init_std=0.01)
                        
# trainer = Trainer(network, x_train, t_train, x_test, t_test,
#                   epochs=max_epochs, mini_batch_size=100,
#                   optimizer='Adam', optimizer_param={'lr': 0.001},
#                   evaluate_sample_num_per_epoch=1000)
# trainer.train()

# # 매개변수 보존
# network.save_params("params.pkl")
# print("Saved Network Parameters!")

# # 그래프 그리기
# markers = {'train': 'o', 'test': 's'}
# x = np.arange(max_epochs)
# plt.plot(x, trainer.train_acc_list, marker='o', label='train', markevery=2)
# plt.plot(x, trainer.test_acc_list, marker='s', label='test', markevery=2)
# plt.xlabel("epochs")
# plt.ylabel("accuracy")
# plt.ylim(0, 1.0)
# plt.legend(loc='lower right')
# plt.show()

train loss:2.2997761366472145
=== epoch:1, train acc:0.148, test acc:0.133 ===
train loss:2.2975034272558785
train loss:2.2937057915710817
train loss:2.290231301500161
train loss:2.2815961106491542
train loss:2.273117443613491
train loss:2.263357824373143
train loss:2.2411198416729623
train loss:2.196539184898299
train loss:2.217913109094189
train loss:2.156688716724658
train loss:2.1278800196706875
train loss:2.1264475511616414
train loss:2.07589388202547
train loss:2.0492835513325427
train loss:1.9982507714442659
train loss:1.8841290708867904
train loss:1.8364878752091673
train loss:1.7988773981255881
train loss:1.6480267796233738
train loss:1.6729247191420327
train loss:1.586668043927189
train loss:1.4857741646497653
train loss:1.38189865364824
train loss:1.3110422674335487
train loss:1.2072953992670046
train loss:1.2173083037894916
train loss:0.9998792683509599
train loss:1.06269297249489
train loss:1.034266608838847
train loss:0.8884760679651187
train loss:0.9890882668636257
train

KeyboardInterrupt: 