In [1]:
import sys, os
path = '../..'
sys.path.append(path)

In [2]:
import numpy as np
import pickle
from dataset.mnist import load_mnist

# MSE 

In [3]:
def mean_squared_error(y, t):
    return 0.5 * np.sum((y-t)**2)

In [4]:
y1 = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
y2 = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
t  = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]

In [5]:
mean_squared_error(np.array(y1), np.array(t))

0.09750000000000003

In [6]:
mean_squared_error(np.array(y2), np.array(t))

0.5975

# MSE with mini batch

In [7]:
def mean_squared_error_with_batch(y, t):
    if y.ndim == 1:
        y = y.reshape(1, y.size)
        t = t.reshape(1, t.size)
    batch_size = y.shape[0]
    return 0.5 * np.sum((y-t)**2) / batch_size

In [8]:
mean_squared_error_with_batch(np.array(y1), np.array(t))

0.09750000000000003

In [9]:
mean_squared_error_with_batch(np.array(y2), np.array(t))

0.5975

# Apply MSE with mini batch on Prediction

In [10]:
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)

In [11]:
print(x_train.shape, x_train.ndim, t_train.shape, t_train.ndim)

(60000, 784) 2 (60000,) 1


In [12]:
train_size = x_train.shape[0]
train_size

60000

In [13]:
batch_size = 10

In [14]:
batch_mask = np.random.choice(train_size, batch_size)
batch_mask

array([54116, 33984, 38195, 21720, 28339, 21925, 36818, 20494, 51420,
       38533])

In [15]:
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
print(x_batch.shape, t_batch.shape)
print(x_batch.ndim, t_batch.ndim)

(10, 784) (10,)
2 1


In [16]:
t_batch

array([4, 6, 2, 3, 6, 8, 7, 6, 7, 5], dtype=uint8)

In [17]:
t_batch_with_onehot = np.eye(10)[t_batch]
t_batch_with_onehot

array([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]])

In [18]:
with open('../../dataset/sample_weight.pkl', 'rb') as f:
    network = pickle.load(f)
print(type(network))
W1, W2, W3 = network['W1'], network['W2'], network['W3']
b1, b2, b3 = network['b1'], network['b2'], network['b3']
print(W1.shape, W2.shape, W3.shape)
print(b1.shape, b2.shape, b3.shape)

<class 'dict'>
(784, 50) (50, 100) (100, 10)
(50,) (100,) (10,)


In [19]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [20]:
def softmax(x):
    x = x - np.max(x)
    return np.exp(x) / np.sum(np.exp(x))

In [21]:
def get_next_layer(activate_function, in_layout, weight, bias):
    next_layer = np.dot(in_layout, weight) + bias
    return activate_function(next_layer)

In [22]:
def predict(x):
    z1 = get_next_layer(sigmoid, x, W1, b1)
    z2 = get_next_layer(sigmoid, z1, W2, b2)
    z3 = get_next_layer(softmax, z2, W3, b3)
    return z3

In [23]:
y_batch = predict(x_batch)
print(y_batch.shape)

(10, 10)


  


In [24]:
mean_squared_error_with_batch(y_batch, t_batch)

161.37484130859374