In [16]:
import numpy as np

In [17]:
# Funciones de activacion para la capa de salida

def linear(z, derivative=False):
    a = z
    if(derivative):
        da = np.ones(z.shape)
        return a, da
    return a

def logistic(z, derivative=False):
    a = 1 / (1 + np.exp(-z))
    if(derivative):
        da = da = np.ones(z.shape) # Funcion de costo: entropia cruzada binaria
        return a, da
    return a

def softmax(z, derivative=False):
    e = np.exp(z - np.max(z, axis=0))
    a = e / np.sum(e, axis=0)
    if(derivative):
        da = np.ones(z.shape) # Funcion de costo: entropia cruzada categorica
        return a, da
    return a

In [18]:
# Funciones de activacion para las capas ocultas

def tanh(z, derivative=False):
    a = np.tanh(z)
    if(derivative):
        da = (1 + a) * (1 - a)
        return a, da
    return a

def relu(z, derivative=False):
    a = z * (z >= 0)
    if(derivative):
        da = np.array((z >= 0), dtype=np.float)
        return a, da
    return a

def logistic_hidden(z, derivative=False):
    a = 1 / (1 + np.exp(-z))
    if(derivative):
        da = a * (1 - a)
        return a, da
    return a

In [None]:
class MLP:

    def __init_(self, layers_dim, hidden_activation = tanh, output_activation = logistic):
        
        # El tama√±o de la tupla representa la cantidad de capas, menos 1 
        # por el numero de entradas
        self.L = (len(layers_dim) - 1)
        self.w = [None] * (len(layers_dim))
        self.b = [None] * (len(layers_dim))
        self.f = [None] * (len(layers_dim))

        # Inicializacion de pesos sinapticos
        for l in range(1, self.L + 1):
            self.w[l] = -1 + 2 * np.random.rand(layers_dim[l], layers_dim[l - 1])
            self.b[l] = -1 + 2 * np.random.rand(layers_dim[l], 1)

            if(l == self.L):
                self.f[l] = output_activation
            else:
                self.f[l] = hidden_activation


    def predict(self, X):

        a = np.asanyarray(X)

        for l in range(1, self.L + 1):
            z = np.dot(self.w[l], a) + self.b[l]
            a = self.f[l](z)
        return a
    
    def fit(self, X, Y, epochs = 500, lr = 0.01):

        P = X.shape[1]

        for _ in range(epochs):
            for p in range(P):

                # Alojamos memoria
                a = [None] * (self.L + 1)
                da = [None] * (self.L + 1)
                local_grad = [None] * (self.L + 1)

                a[0] = X[:p].reshape(-1, 1)
                # Propagacion
                for l in range(1, self.L):
                    z[l] = np.dot(self.w[l], a[l - 1]) + self.b[l]
                    a[l], da[l] = self.f[l](self.f[l], derivative = True)

                # Retropropagacion (Backpropagation)
                for l in range(self.L, 0, -1):
                    if(l == self.L):
                        local_grad[l] = (Y[:, p].reshape(-1, 1))
                    else:
                        local_grad[l] = np.dot(self.w[l + 1].T, local_grad[l + 1]) * da[l]

                # Gradiente descendente estocastico (SGD)
                for l in range(1, self.L + 1):
                    self.w[l] += lr * np.dot(local_grad[l] * a[l - 1].T)
                    self.b[l] += lr * local_grad[l]