# NICO2AI 第5回 勾配法と誤差逆伝搬法 (7/15) 実践演習

課題

- 3層NNモデルのクロスエントロピー誤差最小化をミニバッチ勾配降下法で実装する
- MNISTデータセットを用いて学習を行う

`### CODE HERE ###` と記載されている部分にコードを埋めていく

In [None]:
!wget "https://drive.google.com/uc?export=download&id=1FfK_OGcOU5Jy_jhkXlPYhoq0LmJBIiDB" -O utils.py

In [None]:
%matplotlib inline
import numpy as np
np.random.seed(111)
import matplotlib as mpl
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
from sklearn.metrics import confusion_matrix
import seaborn as sns

from sklearn.datasets import fetch_mldata
from utils import to_categorical, calculate_accuracy, plot_confusion_matrix, get_image_tile

sns.set_style('ticks')

## MNISTデータのロード

In [None]:
mnist = fetch_mldata('MNIST original', data_home='/tmp')
np.random.seed(111)
data_idx = np.arange(70000)
np.random.shuffle(data_idx)
X_train = mnist['data'][data_idx][:50000]
X_valid = mnist['data'][data_idx][50000:60000]
X_test = mnist['data'][data_idx][60000:]
y_train = mnist['target'][data_idx][:50000]
y_valid = mnist['target'][data_idx][50000:60000]
y_test = mnist['target'][data_idx][60000:]

Y_train = to_categorical(y_train)
Y_valid = to_categorical(y_valid)
Y_test = to_categorical(y_test)

In [None]:
im = get_image_tile(X_train, width=10, height=10)
im

## 各種関数の実装

In [None]:
def softmax(U, reduce_axis=0):
  shp = list(U.shape)
  shp[reduce_axis] = 1
  return ### CODE HERE ###

def softmax_cross_entropy(D, Y):
  epsilon = 1e-8
  Y = np.clip(Y, epsilon, 1-epsilon)
  return ### CODE HERE ###

def sigmoid(U):
  return ### CODE HERE ###

def dsigmoid_du(U):
  return ### CODE HERE ###

In [None]:
## エラーチェック

DIM = 20
NB_CLASS = 10
N = 100
U = np.random.normal(size=DIM*N).reshape(DIM, N)
D = np.zeros([NB_CLASS, N])
D[4] = 1.0
Y = np.zeros([NB_CLASS, N])
Y[:] = 1.0 / NB_CLASS
assert softmax(U).shape == (DIM, N), 'softmax(U).shape must be {}. result: {}'.format((DIM, N), softmax(U).shape)
assert softmax_cross_entropy(D, Y).shape == (), \
  'softmax_cross_entropy(D, Y).shape must be {}. result: {}'.format((), softmax_cross_entropy(D, Y).shape)
assert 220 <= softmax_cross_entropy(D, Y) <= 240, \
  'softmax_cross_entropy(D, Y) must approximately equal to 230, when the values of all elements of Y are equal.'
assert sigmoid(U).shape == (DIM, N), 'sigmoid(U).shape must be {}. result: {}'.format((DIM, N), sigmoid(U).shape)
assert dsigmoid_du(U).shape == (DIM, N), \
  'dsigmoid_du(U).shape must be {}. result: {}'.format((DIM, N), dsigmoid_du(U).shape)

## パラメーター初期化関数の実装

In [None]:
x_dim = 784
h_dim = 256
nb_classes = 10

def init_params():
  W_2 = np.random.normal(loc=0.0, scale=1.0, size=x_dim*h_dim).reshape([h_dim, x_dim])
  b_2 = np.zeros(h_dim).reshape(h_dim, 1)
  W_3 = np.random.normal(loc=0.0, scale=1.0, size=h_dim*nb_classes).reshape([nb_classes, h_dim])
  b_3 = np.zeros(nb_classes).reshape(nb_classes, 1)
  return W_2, b_2, W_3, b_3

## 推論用関数の実装

In [None]:
def inference(X):
  X = X.T
  assert X.shape[0] == x_dim

  U_2 = ### CODE HERE ###
  Z_2 = ### CODE HERE ###

  U_3 = ### CODE HERE ###
  Z_3 = ### CODE HERE ###

  return Z_3.T

In [None]:
## エラーチェック

W_2, b_2, W_3, b_3 = init_params()
assert inference(X_train[:100]).shape == (100, NB_CLASS), \
  'inference(X).shape must be {}. result: {}'.format((100, NB_CLASS), inference(X_train[:100]).shape)
assert inference(X_train[:100]).sum(axis=1).shape == np.ones(100).shape, \
  'The sum around the class of output of inference(X) must be 1'

## 学習

In [None]:
nb_epoch = 10
batch_size = 100
nb_batch = int(len(X_train) / batch_size)
eta = 0.01
data_idx = np.arange(len(X_train))

fig = plt.figure(figsize=(4,4))
ax = fig.add_subplot(111)
train_epochs = []
valid_epochs = []
train_losses = []
valid_losses = []
train_accuracies = []
valid_accuracies = []
plot_freq = 50

W_2, b_2, W_3, b_3 = init_params()

for epoch in range(nb_epoch):
  np.random.shuffle(data_idx)
  for batch in range(nb_batch):
    X_batch = X_train[data_idx[batch*batch_size:(batch+1)*batch_size]]
    Y_batch = Y_train[data_idx[batch*batch_size:(batch+1)*batch_size]]
    X = X_batch.T
    D = Y_batch.T

    ## 順方向の推論
    U_2 = ### CODE HERE ###
    Z_2 = ### CODE HERE ###

    U_3 = ### CODE HERE ###
    Z_3 = ### CODE HERE ###
    Y = Z_3

    ## デルタの計算
    Delta_3 = ### CODE HERE ###
    Delta_2 = ### CODE HERE ###

    ## 3層目のパラメーターに関する勾配
    dLdW_3 = ### CODE HERE ###
    dLdb_3 = ### CODE HERE ###

    ## 2層目のパラメーターに関する勾配
    dLdW_2 = ### CODE HERE ###
    dLdb_2 = ### CODE HERE ###

    ## 3層目のパラメーターの更新
    W_3 = ### CODE HERE ###
    b_3 = ### CODE HERE ###

    ## 2層目のパラメーターの更新
    W_2 = ### CODE HERE ###
    b_2 = ### CODE HERE ###

    ## リアルタイムの誤差の描画
    if batch % plot_freq == 0:
      train_epochs.append( epoch+batch/nb_batch )
      train_losses.append( softmax_cross_entropy(D, Y) / batch_size )
      train_accuracies.append( calculate_accuracy(D.argmax(axis=0), Y.argmax(axis=0)) )
      clear_output(wait = True)
      ax.plot( train_epochs, train_losses, label='Train' )
      ax.plot( valid_epochs, valid_losses, label='Validation' )
      plt.xlabel('Epoch')
      plt.ylabel('Loss')
      plt.title('epoch: {:02d}, batch: {:04d}'.format(epoch, batch))
      plt.legend()
      display(fig)
      ax.cla()
  Y = inference(X_valid).T
  valid_epochs.append( epoch+1 )
  valid_losses.append( softmax_cross_entropy(Y_valid.T, Y) / len(Y_valid) )
  valid_accuracies.append( calculate_accuracy(y_valid, Y.argmax(axis=0)) )
fig.clf()

In [None]:
fig = plt.figure(figsize=(4,4))
ax = fig.add_subplot(111)

ax.plot( train_epochs, train_accuracies, label='Train' )
ax.plot( valid_epochs, valid_accuracies, label='Validation' )
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.ylim(0, 1)

## 学習結果の確認

In [None]:
Y_hat = inference(X_valid)
y_hat = Y_hat.argmax(axis=1)
valacc = calculate_accuracy(y_valid, y_hat)

In [None]:
C = confusion_matrix(y_valid, y_hat)
plot_confusion_matrix(C, range(10))
print('Validation accuracy: {:.4f}'.format(valacc))

## テストデータでの確認

In [None]:
Y_hat = ### CODE HERE ###
y_hat = Y_hat.argmax(axis=1)
testacc = ### CODE HERE ###

In [None]:
C = confusion_matrix(y_test, y_hat)
plot_confusion_matrix(C, range(10))
print('Test accuracy: {:.4f}'.format(testacc))