<a href="https://colab.research.google.com/github/yoouza/nlp-models/blob/master/XOR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

X = np.array([[0,0,1,1], [0,1,0,1]])
Y = np.array([0,1,1,0])

In [2]:
# 1) 파라미터 초기화

N_hidden = 2
w_input = np.random.random_sample((len(X)+1, N_hidden))
w_hidden = np.random.random_sample(N_hidden)
w_b = np.random.random_sample()

In [3]:
# 2) feed forward

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

b = np.ones(len(X[0]))
X_b = np.concatenate([X, b.reshape(1, len(b))])

h = sigmoid(np.dot(w_input.T, X_b))
Y_hat = sigmoid(np.dot(w_hidden, h) + b * w_b)

In [4]:
# 3) Loss

N = len(Y)
loss = (-1/N) * np.sum( np.multiply(Y, np.log(Y_hat)) + np.multiply( 1-Y, np.log(1-Y_hat) ) )

In [5]:
# 4) back-propagation

# output -> hidden
gradient_hidden = np.dot(h, Y_hat-Y)
gradient_bias = Y_hat-Y

# hidden -> input
gradient_h = np.dot(w_hidden.reshape(len(w_hidden), 1), (Y_hat-Y).reshape(1, len(Y)))
gradient_input = np.dot(X_b, np.multiply(gradient_h, h, 1-h).T)

# update weights
alpha = 0.01
w_hidden = w_hidden - alpha * gradient_hidden
b = b - alpha * gradient_bias
w_input = w_input - alpha * gradient_input

# weight update 반복

In [9]:
N_iter = 100000

for iter in range(N_iter):
    h = sigmoid(np.dot(w_input.T, X_b))
    Y_hat = sigmoid(np.dot(w_hidden, h) + b * w_b)

    loss = (-1/N) * np.sum( np.multiply(Y, np.log(Y_hat)) + np.multiply(1-Y, np.log(1-Y_hat)) )

    # output -> hidden
    gradient_hidden = np.dot(h, Y_hat-Y)
    gradient_bias = Y_hat-Y

    # hidden -> input
    gradient_h = np.dot(w_hidden.reshape(len(w_hidden), 1), (Y_hat-Y).reshape(1, len(Y)))
    gradient_input = np.dot(X_b, np.multiply(gradient_h, h, 1-h).T)

    # update weights
    w_hidden = w_hidden - alpha * gradient_hidden
    b = b - alpha * gradient_bias
    w_input = w_input - alpha * gradient_input

In [10]:
Y_hat

array([0.12098237, 0.87995006, 0.88006595, 0.12087596])

# 클래스

In [11]:
import numpy as np

class XOR:
    def __init__(self, X, Y, N_hidden, alpha=.01):
        self.X = X
        self.Y = Y
        self.N_hidden = N_hidden
        self.alpha = alpha
        self.b = np.ones(len(X[0]))
        self.X_b = np.concatenate([X, self.b.reshape(1, len(self.b))])
        self.w_input = np.random.random_sample((len(X)+1, N_hidden))
        self.w_hidden = np.random.random_sample(N_hidden)
        self.w_b = np.random.random_sample()

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def train(self, N_iter):

        N = len(self.Y)
        for iter in range(N_iter):

            h = self._sigmoid(np.dot(self.w_input.T, self.X_b))
            Y_hat = self._sigmoid(np.dot(self.w_hidden, h) + self.b * self.w_b)
            loss = (-1/N) * np.sum( np.multiply(self.Y, np.log(Y_hat)) + np.multiply(1-self.Y, np.log(1-Y_hat)) )

            # output -> hidden
            gradient_hidden = np.dot(h, Y_hat-self.Y)
            gradient_bias = Y_hat-self.Y

            # hidden -> input
            gradient_h = np.dot(self.w_hidden.reshape(len(self.w_hidden), 1), (Y_hat-self.Y).reshape(1, N))
            gradient_input = np.dot(self.X_b, np.multiply(gradient_h, h, 1-h).T)

            # update weights
            self.w_hidden = self.w_hidden - alpha * gradient_hidden
            self.b = self.b - alpha * gradient_bias
            self.w_input = self.w_input - alpha * gradient_input

        return (Y_hat > 0.5) * 1, Y_hat

In [12]:
# example 1.

xor = XOR(X, Y, N_hidden = 3)
xor.train(N_iter = 10000)

(array([0, 1, 1, 0]), array([0.05416705, 0.94651544, 0.94677042, 0.05424499]))

In [13]:
# example 2.

xor = XOR(X, Y, N_hidden = 6)
xor.train(N_iter = 1000)

(array([0, 1, 1, 0]), array([0.18704217, 0.82321839, 0.81688198, 0.17737163]))