In [32]:
dic1 = OrderedDict()

In [46]:
class TwoLayerNet :
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01 ): #input 입력층(최초), hidden 은닉층, output 출력층(결과), 
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) # 입력층과. 은닉층에 랜덤 값을 준다. ex) 강아지라고 하면 강아지 사진 1,2,3
        self.params['b1'] = np.zeros(hidden_size) # 은닉층
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size) # 출력층
        self.layers = OrderedDict() # 딕셔너리를 for문을 돌릴수 있도록 도와줌
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) #9장에 있음. #W1, b1을 넣는다.
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.lastLayers = SoftmaxWithLoss()
        
    def predict (self, x) :
        for layer in self.layers.values() :
            x = layer.forward(x) # 예측 단계에서는 softmax함수가 어차피 실제값과 같기 때문에 필요없음.(argmax)
                                # 단, 학습단계에서는 loss값을 구해야 하기 때문에 softmax적용이 필요.
        return x
    def loss(self, x, t) :
        y = self.predict(x)
        return self.lastLayers.forward(y,t)
    
    def accuracy(self, x, t) :
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / x.shape[0]
        return accuracy
    
    def numerical_gradient(self, x, t):
        loss_W = lambda _: self.loss(x,t)
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        return grads
    def gradient(self, x, t) :
        self.loss(x,t)
        dout = 1
        dout = self.lastLayers.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers :
            dout = layer.backward(dout)
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        return grads

In [47]:
import numpy as np
from common.functions import *
from common.gradient import numerical_gradient
from util.layers import *
from collections import OrderedDict
from tqdm import tqdm

In [48]:
from util.mnist import load_mnist

In [49]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
x_batch = x_train[:3]
t_batch = t_train[:3]

grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

In [50]:
for key in grad_numerical.keys() :
    print(key, grad_backprop[key].shape, grad_numerical[key].shape)
    diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
    print(key + ":" + str(diff))

W1 (784, 50) (784, 50)
W1:2.525640416035734e-13
b1 (50,) (50,)
b1:9.019682787117511e-13
W2 (50, 10) (50, 10)
W2:9.416976019552091e-13
b2 (10,) (10,)
b2:1.2012612987666316e-10


In [52]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) #값이 

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) 

train_loss_list = []
#추가
train_acc_list = []
test_acc_list = []

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
lerning_rate = 0.1

#추가
iter_per_epoch = train_size / batch_size


for i in tqdm(range(iters_num)):
    batch_mask = np.random.choice(train_size,batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    grad = network.gradient(x_batch,t_batch)
    
    for key in ('W1', 'b1','W2', 'b2') :
        network.params[key] -= lerning_rate * grad[key]
        
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train,t_train)
        test_acc = network.accuracy(x_test,t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc |" + str(train_acc) + ", "+ str(test_acc))

  1%|█                                                                            | 133/10000 [00:00<00:16, 582.59it/s]

train acc, test acc |0.10861666666666667, 0.112


  8%|█████▉                                                                       | 770/10000 [00:00<00:11, 816.92it/s]

train acc, test acc |0.9028833333333334, 0.9086


 14%|██████████▎                                                                 | 1360/10000 [00:01<00:10, 821.44it/s]

train acc, test acc |0.9212166666666667, 0.9212


 20%|██████████████▉                                                             | 1971/10000 [00:02<00:09, 808.11it/s]

train acc, test acc |0.9346, 0.9321


 25%|███████████████████▎                                                        | 2538/10000 [00:03<00:09, 802.43it/s]

train acc, test acc |0.9434666666666667, 0.9409


 32%|████████████████████████▍                                                   | 3218/10000 [00:03<00:08, 786.52it/s]

train acc, test acc |0.9516666666666667, 0.9499


 37%|████████████████████████████▏                                               | 3710/10000 [00:04<00:08, 720.23it/s]

train acc, test acc |0.9551666666666667, 0.953


 44%|█████████████████████████████████▌                                          | 4414/10000 [00:05<00:06, 802.56it/s]

train acc, test acc |0.9614, 0.9596


 50%|██████████████████████████████████████                                      | 5004/10000 [00:05<00:06, 772.22it/s]

train acc, test acc |0.9633166666666667, 0.9601


 56%|██████████████████████████████████████████▌                                 | 5593/10000 [00:06<00:05, 810.25it/s]

train acc, test acc |0.96605, 0.9616


 61%|██████████████████████████████████████████████▋                             | 6142/10000 [00:07<00:05, 768.64it/s]

train acc, test acc |0.9697, 0.9657


 67%|███████████████████████████████████████████████████                         | 6713/10000 [00:07<00:04, 769.38it/s]

train acc, test acc |0.9715166666666667, 0.9669


 73%|███████████████████████████████████████████████████████▌                    | 7310/10000 [00:08<00:03, 809.32it/s]

train acc, test acc |0.9727, 0.9656


 79%|████████████████████████████████████████████████████████████▎               | 7942/10000 [00:09<00:02, 819.48it/s]

train acc, test acc |0.9725333333333334, 0.9654


 86%|█████████████████████████████████████████████████████████████████▎          | 8599/10000 [00:10<00:01, 862.65it/s]

train acc, test acc |0.9765666666666667, 0.9681


 92%|██████████████████████████████████████████████████████████████████████      | 9226/10000 [00:10<00:00, 838.70it/s]

train acc, test acc |0.9785166666666667, 0.9691


 98%|██████████████████████████████████████████████████████████████████████████▍ | 9791/10000 [00:11<00:00, 796.33it/s]

train acc, test acc |0.9797, 0.9705


100%|███████████████████████████████████████████████████████████████████████████| 10000/10000 [00:11<00:00, 859.36it/s]


In [58]:
x_test.shape

(10000, 784)