In [None]:
# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
import matplotlib.pyplot as plt
from dataset.ocrdata import load_dataset
# from dataset.mnist import load_mnist
from deep_convnet import DeepConvNet
from common.trainer import Trainer

(x_train, t_train), (x_test, t_test) = load_dataset()

network = DeepConvNet(output_size=26)  
trainer = Trainer(network, x_train, t_train, x_test, t_test,
                  epochs=20, mini_batch_size=100,
                  optimizer='Adam', optimizer_param={'lr':0.001},
                  evaluate_sample_num_per_epoch=1000)
trainer.train()

# 매개변수 보관
network.save_params("deep_convnet_params.pkl")
print("Saved Network Parameters!")

[   9  144  144  288  288  576 1024  100]
train loss:25.9937660436
=== epoch:1, train acc:0.062, test acc:0.058 ===
train loss:22.2758194221
train loss:16.3249623203
train loss:9.76230580613
train loss:6.84424027173
train loss:4.55254549708
train loss:4.2381728948
train loss:4.23562901552
train loss:4.27144725846
train loss:4.24607490363
train loss:4.26353246192
train loss:4.23815732155
train loss:4.23876682807
train loss:4.23868682841
train loss:4.23879285192
train loss:4.23832925211
train loss:4.23841218608
train loss:4.23832088662
train loss:4.23870595559
train loss:4.23834676418
train loss:4.23816020226
train loss:4.23820589107


In [None]:
# coding: utf-8
import numpy as np
import sys, os
sys.path.append(os.pardir)
from common.functions import *
from common.layers import *

class DeepConvNet:
    def __init__(self, input_dim=(1,28,28), 
                conv_params1={'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
                conv_params2={'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
                conv_params3={'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1},
                conv_params4={'filter_num':32, 'filter_size':3, 'pad':2, 'stride':1},
                conv_params5={'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
                conv_params6={'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
                hidden_size=100, output_size=26):
        pre_node_num = np.ones(8, dtype=np.uint16)
        pre_channel_num = input_dim[0]
        for idx, conv_params in enumerate([conv_params1, conv_params2, conv_params3, 
                                           conv_params4, conv_params5, conv_params6]):
            pre_node_num[idx] = pre_channel_num * conv_params['filter_size'] ** 2
            pre_channel_num = conv_params['filter_num']
        pre_node_num[6] = conv_params6['filter_num'] * 4 * 4
        pre_node_num[7] = hidden_size
        print(pre_node_num)
        weight_init_std = np.sqrt(2/pre_node_num)
        weight_init_std = np.array([0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01])
        
        self.params = {}
        channel_num = input_dim[0]
        for idx, conv_params in enumerate([conv_params1, conv_params2, conv_params3, 
                                           conv_params4, conv_params5, conv_params6]):
            self.params['W' + str(idx + 1)] = weight_init_std[idx] * np.random.randn(
                conv_params['filter_num'], channel_num, conv_params['filter_size'], conv_params['filter_size'])
            self.params['b' + str(idx + 1)] = np.zeros(conv_params['filter_num'])
            channel_num = conv_params['filter_num']
        self.params['W7'] = weight_init_std[6] * np.random.randn(pre_node_num[6], hidden_size)
        self.params['b7'] = np.zeros(hidden_size)
        self.params['W8'] = weight_init_std[7] * np.random.randn(pre_node_num[7], output_size)
        self.params['b8'] = np.zeros(output_size)
        
        self.layers = []
        self.layers.append(Convolution(self.params['W1'], self.params['b1'], 
                           conv_params1['stride'], conv_params1['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['W2'], self.params['b2'], 
                           conv_params2['stride'], conv_params2['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['W3'], self.params['b3'], 
                           conv_params3['stride'], conv_params3['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['W4'], self.params['b4'],
                           conv_params4['stride'], conv_params4['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['W5'], self.params['b5'],
                           conv_params5['stride'], conv_params5['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['W6'], self.params['b6'],
                           conv_params6['stride'], conv_params6['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Affine(self.params['W7'], self.params['b7']))
        self.layers.append(Relu())
        self.layers.append(DropOut(0.5))
        self.layers.append(Affine(self.params['W8'], self.params['b8']))
        self.layers.append(DropOut(0.5))

        self.last_layer = SoftmaxWithLoss()
        
    def predict(self, x, train_flg=False):
        for layer in self.layers:
            if isinstance(layer, DropOut):
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)

        return x
    
    def loss(self, x, t):
        y = self.predict(x, train_flg=True)
        return self.last_layer.forward(y, t)
    
    def accuracy(self, x, t, batch_size=100):
        acc = 0.0
        
        if t.ndim != 1:
            t = np.argmax(t, axis=1)
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        acc = np.sum(y==t)/x.shape[0]
        return acc
    
    def gradient(self, x, t):
        self.loss(x, t)
        grads = {}
        dout = 1
        dout = self.last_layer.backward(dout)
        
        tmp_layers = self.layers.copy()
        tmp_layers.reverse()
        for layer in tmp_layers:
            dout = layer.backward(dout)
            
        for i, layer_idx in enumerate([0, 2, 5, 7, 10, 12, 15, 18]):
            grads['W' + str(i + 1)] = self.layers[layer_idx].dW
            grads['b' + str(i + 1)] = self.layers[layer_idx].db
            
        return grads
    
    def save_params(self, file_name="params.pkl"):
        params = {}
        for key, val in self.params.items():
            params[key] = val
        with open(file_name, 'wb') as f:
            pickle.dump(params, f)

    def load_params(self, file_name="params.pkl"):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        for key, val in params.items():
            self.params[key] = val

        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
            self.layers[layer_idx].W = self.params['W' + str(i+1)]
            self.layers[layer_idx].b = self.params['b' + str(i+1)]
            
            