<a href="https://colab.research.google.com/github/yoouza/nlp-models/blob/master/XOR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from random import random

In [2]:
X = np.array([[0,0,1,1], [0,1,0,1]])
Y = np.array([0,1,1,0])

In [3]:
# 1) 파라미터 초기화

N_hidden = 2
w_input = np.zeros(((len(X)+1), N_hidden))
w_hidden = np.zeros((N_hidden, 1))

for i in range(len(X)+1):
    for j in range(N_hidden):
        w_input[i][j] = random()

for i in range(N_hidden):
    w_hidden[i] = random()
w_b = random()

In [4]:
# 2) feed forward + sigmoid

b = np.ones((len(X[0]),1))
X_b = np.concatenate((X.T, b), axis=1)
h = np.dot(X_b, w_input)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

h = sigmoid(h)
b2 = b # w_hidden에 해당하는 bias

Y_hat = np.dot(h, w_hidden) + b2 * w_b
Y_hat = sigmoid(Y_hat)

In [5]:
# 3) Loss

N = len(Y_hat)
loss = 0
for i in range(N):
    loss += Y[i]*np.log(Y_hat[i]) + (1-Y[i])*np.log(1-Y_hat[i])
loss = (-1/N)*loss

In [6]:
# 4) Gradient

alpha = 0.01

# w2
Y_ = Y.reshape(Y.shape[0], 1)
gradient_hidden = np.dot(h.T, (Y_hat-Y_))

# w1
gradient_h = np.dot(Y_hat-Y_, w_hidden.T)
gradient_input = np.dot(X_b.T, np.multiply(gradient_h, h, 1-h))

# update by back-propagation
w_hidden = w_hidden - alpha * gradient_hidden
w_input = w_input - alpha * gradient_input

# weight update 반복

In [7]:
for iter in range(100000):
    h = np.dot(X_b, w_input)
    h = sigmoid(h)
    b2 = b2 - alpha * (Y_hat-Y_)
    Y_hat = np.dot(h, w_hidden) + b2 * w_b
    Y_hat = sigmoid(Y_hat)
    
    loss = 0
    for i in range(N):
        loss += Y[i]*np.log(Y_hat[i]) + (1-Y[i])*np.log(1-Y_hat[i])
    loss = (-1/N)*loss

    gradient_hidden = np.dot(h.T, (Y_hat-Y_))

    gradient_h = np.dot(Y_hat-Y_, w_hidden.T)
    gradient_input = np.dot(X_b.T, np.multiply(gradient_h, h, 1-h))

    w_hidden = w_hidden - alpha*gradient_hidden
    w_input = w_input - alpha*gradient_input

In [8]:
Y_hat

array([[0.00165819],
       [0.99835447],
       [0.99835152],
       [0.00164149]])

# 클래스

In [9]:
import numpy as np
from random import random

class XOR:
    def __init__(self, X, Y, N_hidden, alpha):
        self.X = X
        self.Y = Y
        self.Y_ = Y.reshape(Y.shape[0], 1)
        self.N_hidden = N_hidden
        self.alpha = alpha
        self.b = np.ones((len(self.X[0]),1))
        self.X_b = np.concatenate((self.X.T, self.b), axis=1)
        
    
    def forward(self):
        w_input = np.zeros(((len(self.X)+1), self.N_hidden))
        w_hidden = np.zeros((self.N_hidden, 1))

        for i in range(len(self.X)+1):
            for j in range(self.N_hidden):
                w_input[i][j] = random()

        for i in range(self.N_hidden):
            w_hidden[i] = random()
        w_b = random()

        h = np.dot(X_b, w_input)
        h = 1 / (1 + np.exp(-h))
        b2 = self.b
        Y_hat = np.dot(h, w_hidden) + b2 * w_b
        Y_hat = 1 / (1 + np.exp(-Y_hat))

        N = len(Y_hat)
        loss = 0
        for i in range(N):
            loss += Y[i]*np.log(Y_hat[i]) + (1-Y[i])*np.log(1-Y_hat[i])
        loss = (-1/N)*loss

        gradient_hidden = np.dot(h.T, (Y_hat-Y_))
        gradient_h = np.dot(Y_hat-Y_, w_hidden.T)
        gradient_input = np.dot(X_b.T, np.multiply(gradient_h, h, 1-h))

        w_hidden = w_hidden - alpha * gradient_hidden
        w_input = w_input - alpha * gradient_input

        return w_input, w_hidden, w_b, Y_hat

    def train(self, iter):
        self.iter = iter
        b2 = self.b
        w_input = self.forward()[0]
        w_hidden = self.forward()[1]
        w_b = self.forward()[2]
        Y_hat = self.forward()[3]

        for iter in range(self.iter):
            h = np.dot(self.X_b, w_input)
            h = 1 / (1 + np.exp(-h))
            b2 = b2 - alpha * (Y_hat-self.Y_)
            Y_hat = np.dot(h, w_hidden) + b2 * w_b
            Y_hat = 1 / (1 + np.exp(-Y_hat))
            
            loss = 0
            for i in range(N):
                loss += Y[i]*np.log(Y_hat[i]) + (1-Y[i])*np.log(1-Y_hat[i])
            loss = (-1/N)*loss

            gradient_hidden = np.dot(h.T, (Y_hat-self.Y_))

            gradient_h = np.dot(Y_hat-self.Y_, w_hidden.T)
            gradient_input = np.dot(self.X_b.T, np.multiply(gradient_h, h, 1-h))

            w_hidden = w_hidden - alpha*gradient_hidden
            w_input = w_input - alpha*gradient_input
        
        return (Y_hat> 0.5)*1

In [10]:
X = np.array([[0,0,1,1], [0,1,0,1]])
Y = np.array([0,1,1,0])

In [11]:
# example 1.

xor = XOR(X, Y, 3, alpha = 0.01)
xor.train(iter = 10000)

array([[0],
       [1],
       [1],
       [0]])

In [12]:
# example 2.

xor = XOR(X, Y, 6, alpha = 0.01)
xor.train(iter = 1000)

array([[0],
       [1],
       [1],
       [0]])