In [1]:
import sys, os
path = '../..'
sys.path.append(path)

In [2]:
import numpy as np
import pickle
from dataset.mnist import load_mnist

# 1 평균 제곱 오차

In [3]:
def mean_squared_error(y, t):
    return 0.5 * np.sum((y-t)**2)

In [4]:
y1 = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
y2 = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
t  = [  0,    0,   1,   0,    0,   0,   0,   0,   0,   0]

In [5]:
mean_squared_error(np.array(y1), np.array(t))

0.09750000000000003

In [6]:
mean_squared_error(np.array(y2), np.array(t))

0.5975

# 2 미니배치 평균 제곱 오차

In [7]:
def mean_squared_error_with_batch(y, t):
    if y.ndim == 1:
        y = y.reshape(1, y.size)
        t = t.reshape(1, t.size)
    batch_size = y.shape[0]
    return 0.5 * np.sum((y-t)**2) / batch_size

In [8]:
mean_squared_error_with_batch(np.array(y1), np.array(t))

0.09750000000000003

In [9]:
mean_squared_error_with_batch(np.array(y2), np.array(t))

0.5975

# 3 미니배치 신경망 추론에 대한 평균 제곱 오차

In [10]:
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)

In [11]:
print(x_train.shape, x_train.ndim)
print(t_train.shape, t_train.ndim)

(60000, 784) 2
(60000,) 1


In [12]:
train_size = x_train.shape[0]
train_size

60000

In [13]:
batch_size = 10

In [14]:
batch_mask = np.random.choice(train_size, batch_size)
batch_mask

array([35065,  8756, 48352, 40350, 32818, 54605,  2180, 11335,  2184,
       17734])

In [15]:
x_batch = x_train[batch_mask]
x_batch

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [16]:
t_batch = t_train[batch_mask]
t_batch

array([9, 3, 3, 5, 2, 1, 3, 0, 2, 3], dtype=uint8)

In [17]:
print(x_batch.shape, x_batch.ndim)
print(t_batch.shape, t_batch.ndim)

(10, 784) 2
(10,) 1


In [18]:
t_batch_with_onehot = np.eye(10)[t_batch]
t_batch_with_onehot

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]])

In [19]:
print(t_batch_with_onehot.shape, t_batch_with_onehot.ndim)

(10, 10) 2


In [20]:
with open('../../dataset/sample_weight.pkl', 'rb') as f:
    network = pickle.load(f)
print(type(network))

<class 'dict'>


In [21]:
W1, W2, W3 = network['W1'], network['W2'], network['W3']
b1, b2, b3 = network['b1'], network['b2'], network['b3']
print(W1.shape, W2.shape, W3.shape)
print(b1.shape, b2.shape, b3.shape)

(784, 50) (50, 100) (100, 10)
(50,) (100,) (10,)


In [22]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [23]:
def softmax(x):
    x = (x - np.min(x)) / (np.max(x) - np.min(x))
#     x = x - np.max(x)
    return np.exp(x) / np.sum(np.exp(x))

In [24]:
def get_next_layer(activate_function, in_layout, weight, bias):
    next_layer = np.dot(in_layout, weight) + bias
    return activate_function(next_layer)

In [25]:
def predict(x):
    z1 = get_next_layer(sigmoid, x, W1, b1)
    z2 = get_next_layer(sigmoid, z1, W2, b2)
    z3 = get_next_layer(softmax, z2, W3, b3)
    return z3

In [26]:
y_batch = predict(x_batch)
y_batch

  


array([[0.00751473, 0.00856624, 0.00847413, 0.00902363, 0.01347499,
        0.00902964, 0.00866029, 0.01111743, 0.01028774, 0.01422571],
       [0.00915654, 0.00971266, 0.01031822, 0.01431501, 0.00836841,
        0.01085845, 0.00912965, 0.00835815, 0.01050635, 0.00876089],
       [0.00876158, 0.00923481, 0.0081854 , 0.01257091, 0.00928691,
        0.01343095, 0.00799923, 0.00988321, 0.01050298, 0.00987788],
       [0.01010822, 0.00992644, 0.00970848, 0.01321572, 0.00760743,
        0.01300842, 0.00946391, 0.00797639, 0.01076271, 0.00805982],
       [0.0080649 , 0.01099056, 0.0126191 , 0.01177226, 0.00952118,
        0.00935932, 0.01096983, 0.00753878, 0.01035364, 0.00804202],
       [0.00571559, 0.01512553, 0.01012725, 0.01031477, 0.00942276,
        0.00945375, 0.00927326, 0.00999755, 0.01165836, 0.00987102],
       [0.00834639, 0.01057018, 0.01319904, 0.01404658, 0.00763101,
        0.00953475, 0.00803101, 0.00931381, 0.01096654, 0.00838253],
       [0.01553659, 0.00684356, 0.0108027

In [27]:
print(y_batch.shape, y_batch.ndim)

(10, 10) 2


In [28]:
mean_squared_error_with_batch(y_batch, t_batch)

75.18992309570312

In [29]:
y_batch[0]

array([0.00751473, 0.00856624, 0.00847413, 0.00902363, 0.01347499,
       0.00902964, 0.00866029, 0.01111743, 0.01028774, 0.01422571],
      dtype=float32)