In [1]:
%matplotlib inline
import pandas as pd
import numpy as np

In [2]:
X = np.array([[1,2]])
y = np.array([3])

In [3]:
y = np.where(y == 3, 0, 1)

In [38]:
class MLP(object):
    def __init__(self,l2=0.0,n_hidden=3,epochs=30,learning_rate=0.01,shuffle=True,minibatch=1,seed=None):
        self.random = np.random.RandomState(seed)
        self.l2 = l2
        self.n_hidden = n_hidden
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.minibatch = minibatch
        self.shuffle = shuffle
        
    def _onehot(self, y, n_classes):
        onehot = np.zeros((n_classes, y.shape[0]))
        for idx, val in enumerate(y.astype(int)):
            onehot[val, idx] = 1.
        return onehot.T
        
    def ReLU(self, z):
        return np.maximum(0,z.all())
    
    def forward(self, X):
        z_h = np.dot(X, self.w_h)
        a_h = self.ReLU(z_h)
        z_out = np.dot(a_h, self.w_out)
        a_out = self.ReLU(z_out)

        return z_h, a_h, z_out, a_out
    
    def compute_cost(self, y_enc, output):
        L2_term = (self.l2 *
                   (np.sum(self.w_h ** 2.) +
                    np.sum(self.w_out ** 2.)))
        
        term1 = -y_enc * (np.log(output))
        term2 = (1. - y_enc) * np.log((1. - output) + 1e-17)
        cost = np.sum(term1 - term2) + L2_term
        
        return cost
    
    def predict(self, X):
        z_h, a_h, z_out, a_out = self.forward(X)
        y_pred = np.argmax(z_out, axis=1)
        return y_pred
    
    def fit(self, X_train, y_train):
        n_output = np.unique(y_train).shape[0]
        n_features = X_train.shape[1]

        self.w_h = np.array([[2.0,1.0,1.0],[1.0,-2.0,2.0]])

        self.w_out = np.array([[-1.0],[3.0],[2.0]])

        epoch_strlen = len(str(self.epochs))
        self.eval_ = {'cost': [], 'train_acc': [], 'valid_acc': []}

        y_train_enc = self._onehot(y_train, n_output)

        for i in range(self.epochs):
            indices = np.arange(X_train.shape[0])

            if self.shuffle:
                self.random.shuffle(indices)

            for start_idx in range(0, indices.shape[0] - self.minibatch +
                                   1, self.minibatch):
                batch_idx = indices[start_idx:start_idx + self.minibatch]

                z_h, a_h, z_out, a_out = self.forward(X_train[batch_idx])

                sigma_out = a_out - y_train_enc[batch_idx]
                
                activation_derivative_h = 1 if a_h > 0 else 0
                    
                sigma_h = (np.dot(sigma_out, self.w_out.T) *
                           activation_derivative_h)

                grad_w_h = np.dot(X_train[batch_idx].T, sigma_h)
                grad_b_h = np.sum(sigma_h, axis=0)

                grad_w_out = np.dot(a_h.T, sigma_out)
                grad_b_out = np.sum(sigma_out, axis=0)

                delta_w_h = (grad_w_h + self.l2*self.w_h)
                delta_b_h = grad_b_h
                
                self.w_h -= self.learning_rate * delta_w_h

                delta_w_out = (grad_w_out + self.l2*self.w_out)
                delta_b_out = grad_b_out
                self.w_out -= self.learning_rate * delta_w_out
                
            z_h, a_h, z_out, a_out = self.forward(X_train)
            
            cost = self.compute_cost(y_enc=y_train_enc,
                                      output=a_out)

            y_train_pred = self.predict(X_train)

            train_acc = ((np.sum(y_train == y_train_pred)).astype(np.float) /
                         X_train.shape[0])
            
            print('epoch',i+1,':')
            print('w_h: ',self.w_h)
            print('w_out: ',self.w_out)

            self.eval_['cost'].append(cost)
            self.eval_['train_acc'].append(train_acc)

        return self

In [39]:
modelSig = MLP(epochs=20)

In [40]:
modelSig.fit(X,y)

epoch 1 :
w_h:  [[ 2.  1.  1.]
 [ 1. -2.  2.]]
w_out:  [[-1.]
 [ 3.]
 [ 2.]]
epoch 2 :
w_h:  [[ 2.  1.  1.]
 [ 1. -2.  2.]]
w_out:  [[-1.]
 [ 3.]
 [ 2.]]
epoch 3 :
w_h:  [[ 2.  1.  1.]
 [ 1. -2.  2.]]
w_out:  [[-1.]
 [ 3.]
 [ 2.]]
epoch 4 :
w_h:  [[ 2.  1.  1.]
 [ 1. -2.  2.]]
w_out:  [[-1.]
 [ 3.]
 [ 2.]]
epoch 5 :
w_h:  [[ 2.  1.  1.]
 [ 1. -2.  2.]]
w_out:  [[-1.]
 [ 3.]
 [ 2.]]
epoch 6 :
w_h:  [[ 2.  1.  1.]
 [ 1. -2.  2.]]
w_out:  [[-1.]
 [ 3.]
 [ 2.]]
epoch 7 :
w_h:  [[ 2.  1.  1.]
 [ 1. -2.  2.]]
w_out:  [[-1.]
 [ 3.]
 [ 2.]]
epoch 8 :
w_h:  [[ 2.  1.  1.]
 [ 1. -2.  2.]]
w_out:  [[-1.]
 [ 3.]
 [ 2.]]
epoch 9 :
w_h:  [[ 2.  1.  1.]
 [ 1. -2.  2.]]
w_out:  [[-1.]
 [ 3.]
 [ 2.]]
epoch 10 :
w_h:  [[ 2.  1.  1.]
 [ 1. -2.  2.]]
w_out:  [[-1.]
 [ 3.]
 [ 2.]]
epoch 11 :
w_h:  [[ 2.  1.  1.]
 [ 1. -2.  2.]]
w_out:  [[-1.]
 [ 3.]
 [ 2.]]
epoch 12 :
w_h:  [[ 2.  1.  1.]
 [ 1. -2.  2.]]
w_out:  [[-1.]
 [ 3.]
 [ 2.]]
epoch 13 :
w_h:  [[ 2.  1.  1.]
 [ 1. -2.  2.]]
w_out:  [[-1.]
 [ 3.]
 [ 

<__main__.MLP at 0x7f6b909acb70>

In [21]:
modelSig.eval_

{'cost': [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 'train_acc': [3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0,
  3.0],
 'valid_acc': []}