## 1.3　ニューラルネットワークの学習

In [1]:
import numpy as np

### 1.3.4　計算グラフ

#### 1.3.4.3　Repeat ノード

In [2]:
D, N = 8, 7

x = np.random.randn(1, D)
y = np.repeat(x, N, axis=0)

dy = np.random.randn(N, D)
dx = np.sum(dy, axis=0, keepdims=True)

In [3]:
print('x')
print(x)
print('y')
print(y)
print('dx')
print(dx)
print('dy')
print(dy)

x
[[-1.44304996  1.17725675 -0.12420138  2.07285777 -0.12241686  0.31478663
  -0.09785491 -0.2503511 ]]
y
[[-1.44304996  1.17725675 -0.12420138  2.07285777 -0.12241686  0.31478663
  -0.09785491 -0.2503511 ]
 [-1.44304996  1.17725675 -0.12420138  2.07285777 -0.12241686  0.31478663
  -0.09785491 -0.2503511 ]
 [-1.44304996  1.17725675 -0.12420138  2.07285777 -0.12241686  0.31478663
  -0.09785491 -0.2503511 ]
 [-1.44304996  1.17725675 -0.12420138  2.07285777 -0.12241686  0.31478663
  -0.09785491 -0.2503511 ]
 [-1.44304996  1.17725675 -0.12420138  2.07285777 -0.12241686  0.31478663
  -0.09785491 -0.2503511 ]
 [-1.44304996  1.17725675 -0.12420138  2.07285777 -0.12241686  0.31478663
  -0.09785491 -0.2503511 ]
 [-1.44304996  1.17725675 -0.12420138  2.07285777 -0.12241686  0.31478663
  -0.09785491 -0.2503511 ]]
dx
[[-0.30708101  2.20544459 -2.67983521  2.45048637  1.30873204  4.3094086
   3.39689304 -1.17790683]]
dy
[[ 1.41389297  0.20705071  0.97115376 -0.40047094 -0.09205361  0.15370419
  -0.

#### 1.3.4.4　Sum ノード

In [4]:
D, N = 8, 7

x = np.random.randn(N, D)
y = np.sum(x, axis=0, keepdims=True)

dy = np.random.randn(1, D)
dx = np.repeat(dy, N, axis=0)

In [5]:
print('x')
print(x)
print('y')
print(y)
print('dx')
print(dx)
print('dy')
print(dy)

x
[[-1.27344621  0.57444913 -0.36606353 -2.4049274   1.27963987 -1.05187072
  -2.48543214  1.00224097]
 [-0.48693667  0.91252404  2.25142269  0.02843188 -0.29318408 -0.39238549
   0.299127   -0.2857816 ]
 [ 0.79896507 -0.21413036  1.75822567  1.61725405  0.30554797 -1.30179997
  -0.42161082 -0.4847266 ]
 [ 0.02244715  1.35306814  0.73275622 -0.42048722 -0.33600304 -0.93486638
   0.37757002 -1.56368602]
 [-0.59370857  1.24565859  0.77562795 -0.39330915  1.31617643 -0.11063292
   0.5442922  -2.30429861]
 [-0.63445299  0.51780221  0.63853411 -0.50922149 -0.08895127  1.23449091
  -1.78234971 -0.00260806]
 [-0.14987842 -0.66006778  1.54535063  0.35321164  0.29537977 -1.91519179
   0.38456794 -0.34328344]]
y
[[-2.31701064  3.72930397  7.33585374 -1.72904769  2.47860564 -4.47225636
  -3.08383551 -3.98214336]]
dx
[[ 0.0284583  -0.02937256 -0.56595512 -0.42661958 -0.59715495 -0.72522904
  -1.03434835  0.37130914]
 [ 0.0284583  -0.02937256 -0.56595512 -0.42661958 -0.59715495 -0.72522904
  -1.034

#### 1.3.4.5　MatMul ノード

In [6]:
class MatMul:
    def __init__(self, W):
        self.params = [W]
        self.grads = [np.zeros_like(W)]
        self.x = None
        
    def forward(self, x):
        W, = self.params
        out = np.dot(x, W)
        self.x = x
        return out
    
    def backward(self, dout):
        W, = self.params
        dx = np.dot(dout, W.T)
        dW = np.dot(self.x.T, dout)
        self.grads[0][...] = dw
        return dx

3 点リーダーで、上書き（深いコピー）が行われている。

### 1.3.5　勾配の導出と逆伝播の実装

#### 1.3.5.1　Sigmoid レイヤ

In [7]:
class sigmoid:
    def __init__(self):
        self.params, self.grads = [], []
        self.out = None
        
    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        return out
    
    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        return dx

#### 1.3.5.2　Affine レイヤ

In [8]:
class Affine:
    def __init__(self, W, b):
        self.params = [W, b]
        self.grads = [np.zeros_like(W), np.zeros_like(b)]
        self.x = None
        
    def forward(self, x):
        W, b = self.params
        out = np.dot(x, W) + b
        self.x = x
        return out
    
    def backward(self, dout):
        W, b = self.params
        dx = np.dot(dout, W.T)
        dW = np.dot(self.x.T, dout)
        db = np.sum(dout, axis=0)
        
        self.grads[0][...] = dW
        self.grads[1][...] = db
        return dx

### 1.3.6　重みの更新

In [9]:
class SGD:
    def __init__(self, lr=0.01):
        self.lr = lr
        
    def update(self, params, grads):
        for i in range(len(params)):
            params[i] -= self.lr * grads[i]