In [1]:
# インポート
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn import metrics

In [2]:
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100 # (batch_size, n_sequences, n_features)
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100 # (n_features, n_nodes)
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100 # (n_nodes, n_nodes)
batch_size = x.shape[0] # 1
n_sequences = x.shape[1] # 3
n_features = x.shape[2] # 2
n_nodes = w_x.shape[1] # 4
h = np.zeros((batch_size, n_nodes)) # (batch_size, n_nodes)
b = np.array([1, 1, 1, 1]) # (n_nodes,)

In [39]:
x.shape

(1, 3, 2)

In [47]:
x[:,0,:]

array([[0.01, 0.02]])

In [34]:
print(x[:1:1])

[[[0.01 0.02]
  [0.02 0.03]
  [0.03 0.04]]]


In [3]:
class SimpleInitializer:
    """
    ガウス分布によるシンプルな初期化
    Parameters
    ----------
    sigma : float
      ガウス分布の標準偏差
    """
    def __init__(self, sigma=0.005):
        self.sigma = sigma
        
    def W(self, n_nodes1, n_nodes2):
        W = self.sigma * np.random.randn(n_nodes1, n_nodes2)
        pass
        return W
    
    def B(self, n_nodes2):
        B = self.sigma * np.random.rand(n_nodes2)
        pass
        return B

In [4]:
class SGD:
    """
    確率的勾配降下法
    Parameters
    ----------
    lr : 学習率
    """
    def __init__(self, lr=0.001):
        self.lr = lr
    def update(self, layer):
        layer.W = layer.W - self.lr*layer.dW
        layer.B = layer.B - self.lr*layer.dB.mean(axis=0)
        return layer

In [5]:
# タンジェントハイポ
class tanh:
    def __init__(self):
        
        pass
    def forward(self, A):
        self.A = A
        Z = np.tanh(A)
        self.Z = Z       
        return Z        
    def backward(self, dZ):
        dA = dZ*(1-np.tanh(self.A)**2)
    
        return dA

In [6]:
#ソフトマックス
class softmax:
    def __init__(self):
        
        pass
    def forward(self, A):
        Z = (np.exp(A).T/np.exp(A).sum(axis=1)).T
         
        return Z
    def backward(self, Z, Y):
        dA = Z - Y
        
        return dA

In [13]:
class FC:
    """
    ノード数n_nodes1からn_nodes2への全結合層
    Parameters
    ----------
    n_nodes1 : int
      前の層のノード数
    n_nodes2 : int
      後の層のノード数
    initializer : 初期化方法のインスタンス
    optimizer : 最適化手法のインスタンス
    """
    
    def __init__(self, x, w_x,w_h, batch_size,b,initializer=SimpleInitializer, optimizer=SGD):
        self.initializer = SimpleInitializer
        self.optimizer = optimizer
        self.n_sequences = x.shape[1]
        self.n_features = x.shape[2]
        self.n_nodes = w_x.shape[1]
        
        ini = self.initializer()
        #初期値を決める
        # WとBを決定する
        self.Wx = w_x
        self.Wh = w_h
        self.B = b
        self.h = np.zeros((batch_size, self.n_nodes))
        
        opt = self.optimizer
        #最適化処理をインスタンス化
        self.activation1 = tanh()
        
        pass
    def forward(self, Z):
        self.Z = Z
        A = self.Z@self.Wx + self.h@self.Wh + self.B
        self.A = A
        
        self.h = self.activation1.forward(A)
        
        pass
        return A
    def backward(self, dA):
        self.dA = dA
        self.dB = self.dA.mean(axis=0)
        self.dW = self.Z.T@self.dA
        dZ = self.dA@self.W.T
        
        pass
        # 更新
        self.optimizer.update(self)
        return dZ

In [48]:
class ScratchSimpleRNNClassifier():

    def __init__(self,num_iter = 15,lr = 0.01, verbose = True):
        self.verbose = verbose
        self.num_iter = num_iter
        self.lr = lr
        self.verbose = verbose
#         self.n_features = 784
#         self.n_nodes1 = 400
#         self.n_nodes2 = 200
        self.n_output = 10
        self.alpha = 0.001
        self.rec_loss = [] 
        self.rec_val_loss = []         
        
        pass
    def fit(self, x, w_x,w_h, batch_size,b):
        
        #最適化手法のインスタンス化
        optimizer = SGD(self.lr)
        initializer = SimpleInitializer()
        #層の数、各層の特徴数（インプット＆アウトプット）、初期値設定、最適化手法、活性化関数の種類を設定
        self.FC1 = FC(x, w_x,w_h, batch_size,b,initializer, optimizer)
#         self.activation1 = tanh()
#         self.FC2 = FC(self.n_nodes1, self.n_nodes2, initializer, optimizer)
#         self.activation2 = tanh()
#         self.FC3 = FC(self.n_nodes2, self.n_output, initializer, optimizer)
        self.activation3 = softmax()

        self.X = x
#         self.y = y
#         self.X_val = X_val
#         self.y_val = y_val
#         get_mini_batch = GetMiniBatch(self.X, self.y, batch_size=20)
#         self.z_all = np.empty((0, 10),dtype=np.float)
        for i in range(self.FC1.n_sequences):
#             for mini_X_train, mini_y_train in get_mini_batch:
        # フォワードプロパゲーション
            A1 = self.FC1.forward(self.X[:,i,:])
#                 Z1 = self.activation1.forward(A1)
#                 A2 = self.FC2.forward(Z1)
#                 Z2 = self.activation2.forward(A2)
#                 A3 = self.FC3.forward(Z2)
        Z3 = self.activation3.forward(A1)
        self.Z3 = Z3
#         # バックプロパゲーション
#                 dA3 = self.activation3.backward(Z3, mini_y_train) # 交差エントロピー誤差とソフトマックスを合わせている
#                 dZ2 = self.FC3.backward(dA3)
#                 dA2 = self.activation2.backward(dZ2)
#                 dZ1 = self.FC2.backward(dA2)
#                 dA1 = self.activation1.backward(dZ1)
#                 dZ0 = self.FC1.backward(dA1) # dZ0は使用しない
#             self.loss_entropy(mini_y_train)
#             #フィット後のB、Wを抜き出す
#             self.B1 = self.FC1.B
#             self.B2 = self.FC2.B
#             self.B3 = self.FC3.B
            
#             self.W1 = self.FC1.W
#             self.W2 = self.FC2.W
#             self.W3 = self.FC3.W
#             self.val_loss_entropy()
        print(self.FC1.h)        
        if self.verbose:
            #verboseをTrueにした際は学習過程などを出力する
            print()
        pass

#     def loss_entropy(self,y_train_batch):
#         self.loss = -1/self.Z3.shape[0]*(y_train_batch*(np.log(self.Z3))).sum()  
#         self.rec_loss.append(self.loss)
        
#     def val_loss_entropy(self):
#         self.a1 = self.X_val@self.W1 + self.B1
#         self.z1 = np.tanh(self.a1)
#         self.a2 = self.z1@self.W2 + self.B2
#         self.z2 = np.tanh(self.a2)
#         self.a3 = self.z2@self.W3 + self.B3
#         self.z3 = self.activation3.forward(self.a3)        
#         self.val_loss = -1/self.z3.shape[0]*(self.y_val*(np.log(self.z3))).sum()  
#         self.rec_val_loss.append(self.val_loss)
        
    def predict(self, X_test):
        z_all = np.empty((0, 10),dtype=np.float)
#         print(z_all.shape)
        self.a1 = X_test@self.W1 + self.B1
        self.z1 = np.tanh(self.a1)
        self.a2 = self.z1@self.W2 + self.B2
        self.z2 = np.tanh(self.a2)
        self.a3 = self.z2@self.W3 + self.B3
        self.z3 = self.activation3.forward(self.a3)
        z_all = np.concatenate([z_all,self.z3])
        z_all_index = np.argmax(z_all, axis=1)
        
        pass
        return z_all, z_all_index

In [49]:
clf = ScratchSimpleRNNClassifier()
clf.fit(x, w_x,w_h, batch_size,b)

[[0.79494228 0.81839002 0.83939649 0.85584174]]

