## 5.7　誤差逆伝播法の実装

In [1]:
import sys
import numpy as np

In [2]:
sys.path.append('../input/deeplearningfromscratchmaster/deep-learning-from-scratch-master')

In [3]:
from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict

### 5.7.2　誤差逆伝播に対応したニューラルネットワークの実装

重みを持つ層が 2 層であるから、`TwoLayerNet` となっている。

In [4]:
class TwoLayerNet:
    
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.pamras['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        
        self.lastLayer = SoftmaxWithLoss()
        
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
            
        return x
    
    def loss(self, x, t):
        y = self.predict(x)
        
        return self.lastLayer.forward(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        
        if t.ndim != 1:
            t = np.argmax(t, axis=1)
            
        accuracy = np.sum(y == t) / float(x.shape[0])
        
        return accuracy

    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x,t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
    
    def gradient(self, x, t):
        self.loss(x, t)
        
        dout = 1
        dout = self.lastLayer.backward(dout)
        
        layers = list(self.layers.values())
        layers.reverse()
        
        for layer in layers:
            dout = layer.backward(dout)
            
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        
        return grads

### 5.7.4　誤差逆伝播法を使った学習

In [5]:
from ch05.two_layer_net import TwoLayerNet

In [6]:
x_train = np.load('../input/load-mnist/load_mnist/normalize=True, one_hot_label=True/x_train.npy')
t_train = np.load('../input/load-mnist/load_mnist/normalize=True, one_hot_label=True/t_train.npy')
x_test = np.load('../input/load-mnist/load_mnist/normalize=True, one_hot_label=True/x_test.npy')
t_test = np.load('../input/load-mnist/load_mnist/normalize=True, one_hot_label=True/t_test.npy')

print(x_train.shape)
print(t_train.shape)
print(x_test.shape)
print(t_test.shape)

(60000, 784)
(60000, 10)
(10000, 784)
(10000, 10)


In [7]:
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

In [8]:
for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    grad = network.gradient(x_batch, t_batch)
    
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
        
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
        
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc, test_acc)

0.17546666666666666 0.1762
0.9039833333333334 0.9094
0.9253333333333333 0.9273
0.9373 0.9375
0.9448333333333333 0.9432
0.9524 0.9501
0.9556666666666667 0.9531
0.9614 0.9571
0.9636666666666667 0.9595
0.9675166666666667 0.9627
0.9694833333333334 0.9649
0.9712666666666666 0.9652
0.9726833333333333 0.9647
0.9753 0.9687
0.97665 0.9678
0.9777666666666667 0.9695
0.9786166666666667 0.9697


代入で、列名も引き継がれる。これは、よくある `df` の代入と同じである。

In [9]:
grads = {}
grads['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
grads['W2'] = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])

In [10]:
grad = grads

print(grad['W1'])
print('')
print(grad['W2'])

[[0.1 0.3 0.5]
 [0.2 0.4 0.6]]

[[0.1 0.4]
 [0.2 0.5]
 [0.3 0.6]]
