# 4.2 손실 함수

## 4.2.1 평균 제곱 오차

In [1]:
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]

In [2]:
def mean_squared_error(y, t):
    return 0.5 * np.sum((y - t)**2)

In [3]:
mean_squared_error(np.array(y), np.array(t))

0.097500000000000031

In [6]:
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1 ,0.0, 0.6, 0.0, 0.0] # '7'일 확률이 가장 높다고 추정함
mean_squared_error(np.array(y), np.array(t))

0.59750000000000003

## 4.2.2 교차 엔트로피 오차

In [7]:
def cross_entropy_error(y, t):
    delta = 1e-7
    return -np.sum(t * np.log(y + delta))

In [19]:
t = np.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])

In [20]:
y = np.array([0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0])

In [21]:
cross_entropy_error(y, t)

0.51082545709933802

In [22]:
y = np.array([0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0])

In [23]:
cross_entropy_error(y, t)

2.3025840929945458

## 4.2.3 미니배치 학습

In [25]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist

In [26]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label = True)

In [28]:
print(x_train.shape)
print(t_train.shape)

(60000, 784)
(60000, 10)


In [31]:
train_size = x_train.shape[0]
batch_size = 10
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

## 4.2.4 (배치용) 교차 엔트로피 오차 구현하기

In [36]:
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    batch_size = y.shape[0]
    return -np.sum(t * np.log(y)) / batch_size

In [1]:
# 정답 레이블이 원-핫 인코딩이 아니라 숫자 레이블로 주어졌을 때
def cross_entropy_error(y, t):
    if y.dim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    batch_size = y.shape[0]
    return -np.su(np.log(y[np.arange(batch_size), t])) / batch_size

# 4.3 수치 미분

## 4.3.1 미분

In [2]:
# 나쁜 구현 예
def numerical_diff(f, x):
    h = 10e-50
    return (f(x + h) - f(x)) / h