In [62]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import OneHotEncoder

def sigmoid(z):
    a = 1/(1+np.exp(-z))
    return a

def relu(z):
    a = np.maximum(0, z)
    return a

def leaky_relu(z, aplha=0.01):
    a = np.maximum(aplha*z, z)
    return a

def tanh(z):
    a = (np.exp(z) - np.exp(-z)) / (np.exp(z) + np.exp(-z))
    return a

def softmax(z):
    m = z.shape[-1]
    exp_z = np.exp(z)
    a = np.divide(exp_z, np.sum(exp_z)/m)
    return a

def activate(z, activation):
    if activation == "sigmoid":
        return sigmoid(z)
    elif activation == "relu":
        return relu(z)
    elif activation == "leaky_relu":
        return leaky_relu(z)
    elif activation == "tanh":
        return tanh(z)
    else:
        return softmax(z)

class layer:
    def __init__(self, shape, activation):
        self.W = np.random.randn(*shape)*0.01
        self.b = np.zeros((shape[0], 1))
        self.activation = activation
        
    def forward(self, x):
        self.z = np.dot(self.W, x) + self.b
        self.a = activate(self.z, self.activation)
        return self.a, self.W, self.z, x

def backward_propagation_sigmoid(dal, wl, zl, al_1):
    m = dal.shape[-1]
    al = sigmoid(zl)
    dzl = dal * al * (1 - al)
    dwl = 1/m*np.dot(dzl, al_1.T)
    dbl = 1/m*np.sum(dzl, axis=1, keepdims=True)
    dal_1 = np.dot(wl.T, dzl)
    return dal_1, dwl, dbl

def backward_propagation_relu(dal, wl, zl, al_1):
    m = dal.shape[-1]
    al = relu(zl)
    dal_dzl = zl >= 0
    dal_dzl = dal_dzl.astype("int")
    dzl = dal*dal_dzl
    dwl = 1/m*np.dot(dzl, al_1.T)
    dbl = 1/m*np.sum(dzl, axis=1, keepdims=True)
    dal_1 = np.dot(wl.T, dzl)
    return dal_1, dwl, dbl

def backward_propagation_tanh(dal, wl, zl, al_1):
    m = dal.shape[-1]
    al = tanh(zl)
    dzl = dal*(1-al**2)
    dwl = 1/m*np.dot(dzl, al_1.T)
    dbl = 1/m*np.sum(dzl, axis=1, keepdims=True)
    dal_1 = np.dot(wl.T, dzl)
    return dal_1, dwl, dbl

def backward_propagation(dal, wl, zl, al_1, act):
    if act == "sigmoid":
        return backward_propagation_sigmoid(dal, wl, zl, al_1)
    elif act == "relu":
        return backward_propagation_relu(dal, wl, zl, al_1)
    else:
        return backward_propagation_tanh(dal, wl, zl, al_1)

def select(a, c):
    ac = []
    for i, j in zip(c[0, :], range(c.shape[-1])):
        ac.append(a[i, j])
    ac = np.array(ac).reshape(1, len(ac))
    return ac

def grads_correction(dal_dzl, temp, c):
    for idx, i in zip(c[0, :], range(dal_dzl.shape[-1])):
        dal_dzl[idx, i] = temp[0, i]
    return dal_dzl

def backward_propagation_softmax(dal, wl, zl, al_1, c):
    m = dal.shape[-1]
    al = softmax(zl)
    ac = select(al, c)
    dal_dzl = -al * ac
    temp = ac*(1-ac)
    dal_dzl = grads_correction(dal_dzl, temp, c)
    dzl = dal*dal_dzl
    dwl = 1/m*np.dot(dzl, al_1.T)
    dbl = 1/m*np.sum(dzl, axis=1, keepdims=True)
    dal_1 = np.dot(wl.T, dzl)
    return dal_1, dwl, dbl

def one_hot(num_classes, y):
    zero_array = np.zeros((num_classes, y.shape[-1]))
    for y_, i in zip(y[0, :], range(y.shape[-1])):
        zero_array[y_, i] = 1
    return zero_array

class model:
    def __init__(self):
        self.layers = []
    
    def forward_prop(self, x):
        cache = {}
        al = x
        for layer, l in zip(self.layers, range(1, len(self.layers)+1)):
            al, wl, zl, al_1 = layer.forward(al)
            cache["l"+str(l)] = [wl, zl, al_1]
        return al, cache
    
    def backward_prop(self, y_hat, y, cache):
        grads = {}
        y_onehot = one_hot(y_hat.shape[0], y)
        m = y.shape[-1]
        dzl = np.copy(y_hat)
        dzl[y, range(m)] -= 1
        wl, zl, al_1 = cache["l"+str(len(self.layers))]
        dwl = 1/m*np.dot(dzl, al_1.T)
        dbl = 1/m*np.sum(dzl, axis=1, keepdims=True)
        dal = np.dot(wl.T, dzl)
        grads["dW"+str(len(self.layers))] = dwl
        grads["db"+str(len(self.layers))] = dbl
        
        for i in reversed(range(1, len(self.layers))):
            wl, zl, al_1 = cache["l"+str(i)]
            dal, dwl, dbl = backward_propagation(dal, wl, zl, al_1, self.layers[i-1].activation)
            grads["dW"+str(i)] = dwl
            grads["db"+str(i)] = dbl
        return grads
    
    def loss(self, y_hat, y):
        m = y.shape[-1]
        J = -1/m*np.sum(y*np.log(y_hat))
        return J
    
    def update(self, grads, learning_rate):
        for layer, i in zip(self.layers, range(1, len(self.layers)+1)):
            layer.W = layer.W - learning_rate*grads["dW"+str(i)]
            layer.b = layer.b - learning_rate*grads["db"+str(i)]
    
    def train(self, x, y, epoches, learning_rate):
        for i in range(epoches):
            y_onehot = one_hot(self.layers[-1].b.shape[0], y)
            y_hat, cache = self.forward_prop(x)
            J = self.loss(y_hat, y_onehot)
            grads = self.backward_prop(y_hat, y, cache)
            self.update(grads, learning_rate)
            if i %(epoches/10) == 0:
                print(J)

In [63]:
data = load_iris()
x = data["data"].T
y = data["target"].reshape(1, x.shape[-1])
print(x.shape, y.shape)

(4, 150) (1, 150)


In [64]:
nets = model()
layers = [layer((6,4), "tanh"), layer((3,6), "softmax")]
nets.layers = layers
nets.train(x, y, 1000, 0.1)

1.0983897016594966
0.5104690154092868
0.3665867548607246
0.2849938743484492
0.22756053704051768
0.18175294286213525
0.14987405914649674
0.1303453709448769
0.11982219268256375
0.11431554127093613


In [65]:
y_hat, _ = nets.forward_prop(x)

In [66]:
np.argmax(y_hat, axis=0)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])