# 7章 畳み込みニューラルネットワーク

In [27]:
import sys, os
sys.path.append(os.pardir)
sys.path.append(f"{os.pardir}/deep-learning-from-scratch")

import numpy as np
from beartype import beartype
from collections import OrderedDict
from nptyping import NDArray, Shape, Float, Int
from common.util import im2col,col2im

In [18]:
x = np.random.rand(10,1,28,28)
x.shape
# (10, 1, 28, 28) ミニバッチ, チャンネル, 高さ, 幅

(10, 1, 28, 28)

In [19]:
x1 = np.random.rand(1,3,7,7)
col1 = im2col(x1, 5, 5, stride=1, pad=0)
print(col1.shape)   # (9, 75) フィルターの適用領域の数(=1*(((7-5)/1)+1)**2), 入力特徴マップの要素数

(9, 75)


In [20]:
print(x1[0][0])
print(col1[0])

[[0.77557663 0.33806582 0.37237046 0.83112068 0.49650122 0.03914491
  0.20773407]
 [0.51288944 0.50293884 0.85671902 0.72090202 0.27010591 0.5556635
  0.59713498]
 [0.02457308 0.90281899 0.87469388 0.02009157 0.13361658 0.19458219
  0.02499915]
 [0.0209891  0.9720402  0.55244596 0.95761181 0.27203654 0.97884945
  0.37895102]
 [0.19315752 0.15809505 0.68105605 0.29481389 0.35691675 0.23714536
  0.03193322]
 [0.61227175 0.0194606  0.16553847 0.74204764 0.42913091 0.26977271
  0.19511624]
 [0.60038899 0.99727459 0.99125549 0.69182925 0.95594471 0.56926757
  0.72815871]]
[0.77557663 0.33806582 0.37237046 0.83112068 0.49650122 0.51288944
 0.50293884 0.85671902 0.72090202 0.27010591 0.02457308 0.90281899
 0.87469388 0.02009157 0.13361658 0.0209891  0.9720402  0.55244596
 0.95761181 0.27203654 0.19315752 0.15809505 0.68105605 0.29481389
 0.35691675 0.56177801 0.51579877 0.55239822 0.66654575 0.49598721
 0.07402139 0.5829163  0.38529097 0.04022566 0.6660402  0.04880685
 0.29328377 0.72894727 0

In [21]:
x2 = np.random.rand(10,3,7,7)
col2 = im2col(x2, 5, 5, stride=1, pad=0)
print(col2.shape)   # (90, 75) フィルターの適用領域の数(=10*(((7-5)/1)+1)**2), 入力特徴マップの要素数

(90, 75)


In [22]:
def output_size(x_len, pad, filter_len, stride):
    rest = ((x_len + 2 * pad) - filter_len)
    if rest < 0:
        raise Exception(f"Filter length {filter_len} is longer than input size {x_len} + {pad}!")
    elif rest % stride != 0:
        raise Exception(f"Rest length {rest} and stride {stride} are conflicted!")
    else:
        return rest / stride + 1

class Convolution:
    # 今回はフィルターのチャンネル数を3で固定している
    @beartype
    def __init__(self, W: NDArray[Shape['FN,3,FH,FW'],Float], b: NDArray[Shape['FN'], Float], stride=1, pad=0):
        # FN: Filter Number
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad

        # 中間データ（backward時に使用）
        self.x = None   
        self.col = None
        self.col_W = None
        
        # 重み・バイアスパラメータの勾配
        self.dW = None
        self.db = None

    @beartype
    def forward(self, x: NDArray[Shape['N,3,H,W'], Float]):
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h = output_size(H, self.pad, FH, self.stride)
        out_w = output_size(W, self.pad, FW, self.stride)

        col: NDArray[Shape['N*out_h*out_w,3*FH*FW'], Float] = im2col(x, FH, FW, self.stride, self.pad)
        col_W: NDArray[Shape['3*FH*FW,FN'], Float] = self.W.reshape(FN, -1).T
        out: NDArray[Shape['N*out_h*out_w,FN', Float]] = np.dot(col, col_W) + self.b
        reshaped_out: NDArray[Shape['N,FN,out_h,out_w'], float] = out.reshape(N, out_h, out_w, FN).transpose(0,3,1,2)

        self.x = x
        self.col = col
        self.col_W = col_W

        return reshaped_out

    @beartype
    def backward(self, dout: NDArray[Shape['N,FN,out_h,out_w'], Float]):
        FN, C, FH, FW = self.W.shape
        dout_matrix: NDArray[Shape['N*out_h*out_w,FN'], Float] = dout.transpose(0,2,3,1).reshape(-1, FN)

        self.db: NDArray[Shape['1,N'], Float] = np.sum(dout_matrix, axis=0)
        dW_matrix: NDArray[Shape['C*FH*FW,FN'], Float] = np.dot(self.col.T, dout_matrix)
        self.dW = self.dW_matrix.transpose(1, 0).reshape(FN, C, FH, FW)

        dcol: NDArray[Shape['N*out_h*out_w,3*FH*FN'], Float] = np.dot(dout_matrix, self.col_W.T)
        dx: NDArray[Shape['N,3,H,W']] = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)

        return dx


In [23]:
class Pooling:
    def __init__(self, pool_h, pool_w, stride=2, pad=0):
        # pool_h, pool_wはそれぞれプーリング適用領域の高さ・幅。例えば3x3=9からmaxを取るなら、pool_h=3, pool_w=3
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad

    @beartype
    def forward(self, x: NDArray[Shape['N,C,H,W'], Float]):
        # 出力特徴マップの奥行きを、対象が色ではないのにチャンネルと呼ぶのは個人的にまだ違和感があるが、そのうち慣れる。
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)

        col: NDArray[Shape['N,C,H*W'], Float] = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        reshaped_col: NDArray[Shape['N*C,pool_h*pool_w'], Float] = out.reshaped(N*C, self.pool_h*self.pool_w)
        out: NDArray[Shape['N*C,1'], Float] = np.max(col, axis=1)
        reshaped_out: NDArray[Shape['N,C,out_h,out_w'], Float] = out.reshape(N, out_h, out_w, C).transpose(0,3,1,2)

        return reshaped_out

    @beartype
    def backward(self, dout: NDArray[Shape['N,C,out_h,out_w'], Float]):
        # TODO
        return dx


In [24]:
# Rectifyは電流の交流を整流にすることから名付けられた。電流の交流は正負の電流が交互に流れるが、整流にすると正の電流のみが流れる。
class Relu:
    def __init__(self):
        self.mask = None

    @beartype
    def forward(self, x: NDArray[Shape['N'], Float]):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    @beartype
    def backward(self, dout: NDArray[Shape['N'], Float]):
        dout[self.mask] = 0
        dx = dout

        return dx


In [25]:
class Affine:
    def __init__(self, W: NDArray[Shape['S,WS'], Float], b: NDArray[Shape['D'], Float]):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None

    @beartype
    def forward(self, x: NDArray[Shape['N,S'], Float]):
        self.x = x
        out = np.dot(x, self.W) + self.b

        return out

    @beartype
    def backward(self, dout: NDArray[Shape['N,WS'], Float]):
        dx: NDArray[Shape['N,S'], Float] = np.dot(dout, self.W.T)
        self.dW: NDArray[Shape['S,WS'], Float] = np.dot(self.x.T, dout)
        self.db: NDArray[Shape['1'], Float] = np.sum(dout, axis=0)

        return dx

In [26]:
# 畳み込み層 → 全結合層(ReLU) → 全結合層(Softmax)を想定

class SimpleConvNet:
    def __init__(self, input_dim=(1,28,28),
        # filter_size:5は、5x5を表す。正方形がメジャー。
        conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
        hidden_size=100, output_size=10, weight_init_std=0.01):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        conv_output_size = int(1 + (input_size + 2*filter_pad - filter_size) / filter_stride)
        pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))

        self.params = {}
        self.params['W1']: NDArray[Shape['FN,C,FS,FS'],Float] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros[filter_num]
        self.params['W2']: NDArray[Shape['PS,HS']] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
        self.params['b2'] = np.zeros[hidden_size]
        self.params['W3']: NDArray[Shape['HS,OS']] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros[output_size]

        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad'])
        self.layers['Relu1'] = Relu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        self.last_layer = SoftmaxWithLoss()

    def predict(self, x: NDArray):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.last_layer.forward(y,t)
