In [1]:
# coding: utf-8
import sys, os
import numpy as np
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정

In [2]:
def mean_squared_error(y, t):
    return 0.5 * np.sum((y-t)**2)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))    

def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)

    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)

        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)

        x[idx] = tmp_val # 값 복원
        it.iternext()   
        
    return grad

class XorLayerNet:

    def __init__(self, input_size, hidden_size, output_size):
        # 가중치 초기화
        self.params = {}
        self.params['W1'] = np.random.randn(input_size, hidden_size) / np.sqrt(input_size)
        self.params['b1'] = np.zeros(hidden_size)
        xavier_init = np.sqrt(output_size)
        self.params['W2'] = np.random.randn(hidden_size, output_size) / np.sqrt(hidden_size)
        self.params['b2'] = np.zeros(output_size)

    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
    
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = a2

        return y
        
    # x : 입력 데이터, t : 정답 레이블
    def loss(self, x, t):
        y = self.predict(x)
        
        return mean_squared_error(y, t)
    
    # x : 입력 데이터, t : 정답 레이블
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
        
    
network = XorLayerNet(input_size=2, hidden_size=10, output_size=1)
x_train =  np.array([[0,0],[0,1],[1,0],[1,1]])
t_train = np.array([0,1,1,0])

# 하이퍼파라미터
epoch = 5000  # 반복 횟수를 적절히 설정한다.
data_size = 4
learning_rate = 0.01
for i in range(epoch):
    
    for j in range(data_size):
        # 기울기 계산
        grad = network.numerical_gradient(np.array(x_train[j]), np.array(t_train[j]))
    
        # 매개변수 갱신
        for key in ('W1', 'b1', 'W2', 'b2'):
            network.params[key] -= learning_rate * grad[key]
     
        # 학습 경과 기록
        loss = network.loss(x_train[j], t_train[j])
        print("x :", x_train[j], "  predict :", network.predict(x_train[j]) ,"  target : ", t_train[j],  "  loss : ", loss)

    

NameError: name 'train_size' is not defined

In [11]:
print("test!!! start")
x_test = np.array([[1,1],[0,1]])
t_test = np.array([0,1])
print("predict : ", network.predict(x_test[0]), "correct : ", t_test[0])
print("predict : ", network.predict(x_test[1]), "correct : ", t_test[1])


test!!! start
predict :  [ 0.2852159] correct :  0
predict :  [ 0.68895895] correct :  1
