In [1]:
import numpy as np
import copy as cp
import time
import random

from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.utils import check_random_state

## Obtenção dos dados

In [2]:
train_samples = 50000
X_original, y_original = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)

In [3]:
random_state = check_random_state(0)
permutation = random_state.permutation(X_original.shape[0])
X = X_original[permutation]
y = y_original[permutation]
X = X.reshape((X.shape[0], -1))

In [4]:
def split_dataset(X, y, seed):
    train_samples = 50000
    X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=train_samples, test_size=10000, random_state = seed)

    y_train = y_train.reshape(-1,1)
    y_test = y_test.reshape(-1,1)

    # Pre processamento entrada
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    encoder = OneHotEncoder()
    encoder.fit(y_train)
    y_train = encoder.transform(y_train).toarray()
    y_test = encoder.transform(y_test).toarray()
    return X_train, X_test, y_train, y_test

# Classe da rede neural

In [5]:
# classe que representa uma rede neural

class NeuralNet(object):
    def __init__(self, sizes, raio = None, random_state = 0):
        """sizes contem o numero de neuronios em cada camada. 
        raio contem um numero positivo que define o limite da norma dos pesos iniciais da rede
        Ex: se sizes = [2,3,1]  entao a rede vai ter 2 neuronios
        na primeira camada, 3 neuronios na segunda camada e 
        1 neuronio na terceira camada."""
        np.random.seed(random_state)
        
        self.seed = random_state
        self.num_layers = len(sizes) # numero de camadas da rede
        self.sizes     = sizes      # numero de neuronios na respectiva camada
        aux = np.ones(len(sizes), dtype = np.int8) #para adicionar 1 neuronio em cada camada exceto na ultima
        aux[-1] = 0
        self.sizes_com_bias = sizes + aux
        self.weights = [np.random.randn(y,x) for x, y in zip(self.sizes_com_bias[:-1], self.sizes_com_bias[1:])]
        self.raio = raio
        if raio is not None:
          weights_vec = mat2vet(self.weights)
          rand_factor = np.random.rand()
          weights_vec = (raio-1) * rand_factor * weights_vec / np.linalg.norm(weights_vec)
          self.weights = self.vet2mat(weights_vec)
        
    # Faz o feedfoward em um conjunto de entrada utilizando a softmax na saida
    def feedfowardbatch(self, a):        
        uns = np.ones((np.shape(a)[0],1))
        a = np.concatenate((uns, a), axis = 1)
        a = a.T
        for i in range(0, len(self.weights)-1):
            a = sigmoid(np.dot(self.weights[i],a))
            a[0 , :] = 1
        
        output = np.dot(self.weights[-1], a)
        output = np.exp(output)

        soma = np.sum(output, axis = 0)
        retorno = output/ soma[None, :]
        return retorno

    def feedforward(self, a):
        #forca o primeiro neuronio de cada camada ser 1 uma vez que representa o bias
        a = np.concatenate(([1], a), axis=0)
        a = a.T
        for i in range(0,len(self.weights)-1):
            a = sigmoid(np.dot(self.weights[i], a))
            a[0] = 1
        return sigmoid(np.dot(self.weights[-1], a))

    def trainFDIPA(self, X, y, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test):
        n = len(X)

        # self.acuracia_treino =  {'metodo':'FDIPA', 'passo_inicial' : eta, 'iteracoes' : iteracoes, 'raio': self.raio, 'semente': self.seed}
        # self.acuracia_teste  =  {'metodo':'FDIPA', 'passo_inicial' : eta, 'iteracoes' : iteracoes, 'raio': self.raio, 'semente': self.seed}
        # self.perda_treino    =  {'metodo':'FDIPA', 'passo_inicial' : eta, 'iteracoes' : iteracoes, 'raio': self.raio, 'semente': self.seed}
        # self.perda_teste     =  {'metodo':'FDIPA', 'passo_inicial' : eta, 'iteracoes' : iteracoes, 'raio': self.raio, 'semente': self.seed}
        # self.norma_peso      =  {'metodo':'FDIPA', 'passo_inicial' : eta, 'iteracoes' : iteracoes, 'raio': self.raio, 'semente': self.seed}
        
        for j in range(epochs):
            p = np.random.permutation(len(X))
            X = X[p]
            y = y[p]

            for k in range(0, n, mini_batch_size):
                self.update_mini_batch_FDIPA(X[k:k+mini_batch_size], y[k:k+mini_batch_size], eta, iteracoes, raio)

            # predictions_train = (self.feedfowardbatch(X)).T
            # predictions_test = (self.feedfowardbatch(X_test)).T
            # accuracy_train = np.sum(np.argmax(predictions_train, axis = 1) == np.argmax(y, axis = 1))/X.shape[0]
            # accuracy_test = np.sum(np.argmax(predictions_test, axis = 1) == np.argmax(y_test, axis = 1))/X_test.shape[0]
            # loss_train = self.feedforwardFDIPA(self.weights, X, y) 
            # loss_test = self.feedforwardFDIPA(self.weights, X_test, y_test)
            # norm_weight = np.linalg.norm(mat2vet(self.weights))

            # self.acuracia_treino[str(j+1)] = accuracy_train
            # self.acuracia_teste[str(j+1)]  = accuracy_test
            # self.perda_treino[str(j+1)]    = loss_train 
            # self.perda_teste[str(j+1)]     = loss_test 
            # self.norma_peso[str(j+1)]      = norm_weight
        
        
        predictions_test = (self.feedfowardbatch(X_test)).T
        self.accuracy_test = np.sum(np.argmax(predictions_test, axis = 1) == np.argmax(y_test, axis = 1))/X_test.shape[0]
        
    def update_mini_batch_FDIPA(self, x_train, y_train, eta, iteracoes, raio):
        L0 = np.ones([1])
        self.weights = self.FDIPA(self.weights, L0, iteracoes, x_train, y_train, eta, raio)

    # Stochastic Gradient Descent
    def SGD(self, X, y, epochs, mini_batch_size, eta, X_test = None, y_test = None):
        n = len(X)

        # self.acuracia_treino =  {'metodo':'FDIPA', 'passo_inicial' : eta, 'iteracoes' : iteracoes, 'raio': self.raio, 'semente': self.seed}
        # self.acuracia_teste  =  {'metodo':'FDIPA', 'passo_inicial' : eta, 'iteracoes' : iteracoes, 'raio': self.raio, 'semente': self.seed}
        # self.perda_treino    =  {'metodo':'FDIPA', 'passo_inicial' : eta, 'iteracoes' : iteracoes, 'raio': self.raio, 'semente': self.seed}
        # self.perda_teste     =  {'metodo':'FDIPA', 'passo_inicial' : eta, 'iteracoes' : iteracoes, 'raio': self.raio, 'semente': self.seed}
        # self.norma_peso      =  {'metodo':'FDIPA', 'passo_inicial' : eta, 'iteracoes' : iteracoes, 'raio': self.raio, 'semente': self.seed}

        for j in range(epochs):
            p = np.random.permutation(len(X))
            X = X[p]
            y = y[p]

            for k in range(0, n, mini_batch_size):
                self.update_mini_batch(X[k:k+mini_batch_size], y[k:k+mini_batch_size], eta)


            # predictions_train = (self.feedfowardbatch(X)).T
            # predictions_test = (self.feedfowardbatch(X_test)).T
            # accuracy_train = np.sum(np.argmax(predictions_train, axis = 1) == np.argmax(y, axis = 1))/X.shape[0]
            # accuracy_test = np.sum(np.argmax(predictions_test, axis = 1) == np.argmax(y_test, axis = 1))/X_test.shape[0]
            # loss_train = self.feedforwardFDIPA(self.weights, X, y) 
            # loss_test = self.feedforwardFDIPA(self.weights, X_test, y_test)
            # norm_weight = np.linalg.norm(mat2vet(self.weights))

            # self.acuracia_treino[str(j+1)] = accuracy_train
            # self.acuracia_teste[str(j+1)]  = accuracy_test
            # self.perda_treino[str(j+1)]    = loss_train 
            # self.perda_teste[str(j+1)]     = loss_test 
            # self.norma_peso[str(j+1)]      = norm_weight


        predictions_test = (self.feedfowardbatch(X_test)).T
        self.accuracy_test = np.sum(np.argmax(predictions_test, axis = 1) == np.argmax(y_test, axis = 1))/X_test.shape[0]                        

    def update_mini_batch(self, x_train, y_train, eta):
        """ Atualiza os pesos e bias da rede aplicando 
        a descida do gradiente usando backpropagation para um unico mini lote.
        'eta' eh a taxa de aprendizado.        """        

        nabla_w = self.backprop_softmax(self.weights, x_train, y_train)
        # nabla_w = self.backpropFDIPA(self.weights, x_train, y_train)
        self.weights = [w-(eta/len(x_train))*nw for w, nw in zip(self.weights, nabla_w)]

    # Realiza o backpropagation para um conjunto de dados x e y considerando softmax
    def backprop_softmax(self, w, x, y):
        """Retorna `nabla_w` representando o
         gradiente para a funcao de custo C_x. `nabla_w` eh uma lista de camadas de matrizes numpy,
         semelhante a  `self.weights`."""
        nabla_w = [np.zeros(waux.shape) for waux in w]
        
        # Feedforward
        activation = x
        uns = np.ones((np.shape(activation)[0],1))
        activation = np.concatenate((uns, activation), axis = 1)
        activation = activation.T
        # Lista para armazenar todas as ativacoes, camada por camada
        activations = [activation] 
        y = y.T

        # Lista para armazenar todos os vetores z, camada por camada
        zs = [] 
        for i in range(0,len(w)-1):
            z = np.dot(w[i], activation)
            zs.append(z)
            activation = sigmoid(z)
            activation[0,:] = 1
            activations.append(activation)

        z = np.dot(w[-1], activation)
        zs.append(z)

        activation = softmax(zs[-1][:, 0])
        for j in range(1, zs[-1].shape[1]):
            activation = np.concatenate((activation, softmax(zs[-1][:,j])), axis = 1)

        num_neuronios = activation.shape[0]
        activations.append(activation)
        delta = (activations[-1] - y)
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
 
        # Aqui, l = 1 significa a ultima camada de neuronios, l = 2 eh a
        # segunda e assim por diante. 
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(w[-l+1].transpose(), delta) * sp
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
            
        return nabla_w        

    def evaluate(self, test_data):
        """Retorna o numero de entradas de teste para as quais a rede neural 
         produz o resultado correto. Note que a saida da rede neural
         eh considerada o indice de qualquer que seja
         neuronio na camada final que tenha a maior ativacao."""
        test_results = [(np.argmax(self.feedforward(x)), y) for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)

    # output_activations representa a computacao da rede(dimensao: m x n onde m eh o numero 
    # de neuronios da ultima camada e n eh o numero de dados considerados) e y sao os dados de treinamento
    def cost(self, output_activations, y):
        n = len(y)
        index = np.argmax(y, axis = 1)
        output = output_activations[index, np.arange(len(index))]
        output = np.log(output)
        return -(1/n)*np.sum(output)

    def cost_derivative_simplified(self, output_activations, y):
        """Retorna o vetor das derivadas parciais do custo."""
        n = len(y)
        index = np.argmax(y, axis = 1)
        output = output_activations[np.arange(len(index)), index]
        return (1/n) * np.sum(1/output)

    def cost_derivative(self, output_activations, y):
        """Retorna o vetor das derivadas parciais.
            y tem dimensao n x k onde n eh o numero de nuronios da saida
            e k o numero de amostras. """
        n = len(y) #numero de neuronios na ultima camada

        derivative = 1/softmax(output_activations[:,0])
        for i in range(1, output_activations.shape[1]):
            derivative = np.concatenate((derivative, 1/softmax(output_activations[:,i])), axis = 1)

        derivative = y*derivative
        derivative = self.concat(derivative, n)
        return derivative

    #converte matriz para vetor
    def mat2vet(self):
        retorno = np.concatenate(list(map(lambda a: a.reshape(-1), self.weights))) #concatena lista de arrays
        retorno = retorno.reshape(retorno.size, 1)
        return retorno

    #converte vetor para matriz    
    def vet2mat(self,v):
        start = 0
        weightsAux = []
        for w in self.weights:
            end = start + w.size
            weightsAux.append(v[start:end].reshape(w.shape))
            start = end
        return weightsAux
            
    #Nesse caso, devera ser realizada a transformacao antes
    #de fazer a computacao da rede
    #Funcao que queremos minimizar
    def feedforwardFDIPA(self, w, x, y):
        """Retorna o f do FDIPA"""
        uns = np.ones((np.shape(x)[0],1))
        a = np.concatenate((uns, x), axis = 1)
        a = a.T

        for i in range(0, len(w)-1):
            a = sigmoid(np.dot(w[i], a))
            a[0 , :] = 1
        output = np.dot(w[-1], a)
        output = np.exp(output)
        soma = np.sum(output, axis = 0)
        retorno = output / soma[None, :]
        return self.cost(retorno, y)

    #w nesse caso pode ser o vetor
    def g_FDIPA(self, w, raio):
        """Retorna a restricao g do FDIPA
        Recebe w como um vetor"""
        r = raio**2 
        g = (np.linalg.norm(w)**2) - r
        return g

    #retorna df pro FDIPA
    def df_FDIPA(self, w, x, y):
        """Retorna a derivada da funcao f. 
        Recebe w como uma lista de matrizes"""
        return mat2vet(self.backprop_softmax(w, x, y))
    
    #retorna dg pro FDIPA
    def dg_FDIPA(self, w):
        """Retorna derivada da restricao g. 
        Recebe w como um vetor"""
        return 2*w  #no caso de g ser |w|^2 - r^2

    def ddfunFDIPA(self, w, L):
        return np.eye(len(w))
    
    # w0 passado eh o w inicial
    # w0 eh passado na forma de matrizes do mesmo formato dos pesos da rede
    # x e y sao dados de treinamento
    def  FDIPA(self, w0, L0, tol, x_train, y_train, eta, raio):
        
        #Dados iniciais
        # x0 converte w0 na forma de vetor
        x0 = mat2vet(w0)
        
        f0 = self.feedforwardFDIPA(w0, x_train, y_train)
        g0 = self.g_FDIPA(x0, raio)

        df0 = self.df_FDIPA(w0, x_train, y_train)
        dg0 = self.dg_FDIPA(x0)

        # B0 = self.ddfunFDIPA(x0, L0)
        # n = len(x0)
        m = len(L0)

        # Caso queira mais de uma restricao
        E = np.ones([m,1])        

        phi = .1 #Multiplica a norma de d1
        epsi = 0.8

        # d1 = np.ones_like(x0)

        # Inicio do Programa FDIPA        
        cont = 0
        while cont < tol :
            cont = cont + 1
            # Calculo da direcao

            norm_dg0 = np.linalg.norm(dg0)**2
            div = (g0/L0 - norm_dg0)

            dx1 = -(df0 + (dg0.dot(dg0.T.dot(df0) )) / div)

            # dx1 = -df0 - (dg0.T.dot(df0)) * dg0 / div
            
            if np.linalg.norm(dx1) < 10**(-6):
                # print("saida1")
                return self.vet2mat(x0)
            else:
                #Segunda direcao d_beta
                dx2 = (L0/g0)*(dg0 + (dg0.dot(norm_dg0))/div)

                if  (df0.T).dot(dx2) > 0:
                    #rho
                    r0 = min( [phi*np.linalg.norm(dx1)**2, ((epsi-1)*(df0.T).dot(dx1))/(df0.T.dot(dx2))])
                else:
                    r0 = phi*(np.linalg.norm(dx1)**2)               

                # Direcao de busca
                dx = dx1 + r0*dx2

                t = eta/len(y_train)                

                Lx1 = -(L0/g0)*(dg0.T.dot(dx1))
                Lx2 = -(L0/g0)*(dg0.T.dot(dx2)+E)
                L   = np.abs(Lx1+r0*Lx2)

                xn = x0 + t*dx                
                mat = self.vet2mat(xn)
                fn = self.feedforwardFDIPA(mat, x_train, y_train)
                gn = self.g_FDIPA(xn, raio)
                
                while ((fn-f0) > 0 or gn >= 0 ):
                    t = 0.9 * t
                    xn = x0 + t * dx
                    mat = self.vet2mat(xn)
                    fn = self.feedforwardFDIPA(mat, x_train, y_train)
                    gn = self.g_FDIPA(xn, raio)

                # Criterio de Parada 
                # Parada Forcada   
                if (np.linalg.norm(f0-fn) < 1e-6):
                    return self.vet2mat(xn)

                x0  = xn

                f0 = fn
                g0 = self.g_FDIPA(x0, raio)
                
                mat = self.vet2mat(x0)
                df0 = self.df_FDIPA(mat, x_train, y_train)
                dg0 = self.dg_FDIPA(x0)

                L0 = L + 10**(-8)
        return self.vet2mat(xn)
        # return [xn, L, fn, gn, counter, t, d, r0]

def f_activation(z):
    pass

def df_activation(z):
    pass

def softmax(z):
    exp_z = np.exp(z)
    retorno = exp_z/np.sum(exp_z) 
    return retorno.reshape(-1, 1)

def softmax_derivative(z):
    """ Return a matrix n x n """
    z_column_vector = z.reshape(-1,1)
    return np.diag(z_column_vector) - z_column_vector.dot(z_column_vector.T)

# funcao de ativacao sigmoide
def sigmoid(z):
    return 1.0/(1.0 + np.exp(-z))
    
# Funcao para retornar as derivadas da funcao Sigmoide
def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

# converte matriz para vetor
def mat2vet(w):
    retorno = np.concatenate(list(map(lambda a: a.reshape(-1), w))) #concatena lista de arrays
    retorno = retorno.reshape(retorno.size, 1)
    return retorno

In [1]:
import pandas as pd
#Parametros fixados
epochs = 100
mini_batch_size = 128

#Data Frames dos resultados
acuracia_treino = pd.DataFrame(columns=['metodo', 'passo_inicial', 'iteracoes', 'raio', 'semente'])
acuracia_teste  = pd.DataFrame(columns=['metodo', 'passo_inicial', 'iteracoes', 'raio', 'semente'])
norma_peso      = pd.DataFrame(columns=['metodo', 'passo_inicial', 'iteracoes', 'raio', 'semente'])
perda_treino    = pd.DataFrame(columns=['metodo', 'passo_inicial', 'iteracoes', 'raio', 'semente'])
perda_teste     = pd.DataFrame(columns=['metodo', 'passo_inicial', 'iteracoes', 'raio', 'semente'])

for i in range(epochs):
    acuracia_treino[str(i+1)] = []
    acuracia_teste[str(i+1)]  = []
    norma_peso[str(i+1)]      = []
    perda_treino[str(i+1)]    = []
    perda_teste[str(i+1)]     = []

acuracia_treino['tempo'] = []
acuracia_teste['tempo']  = []
norma_peso['tempo']      = []
perda_treino['tempo']    = []
perda_teste['tempo']     = []

In [7]:
acuracia_treino.to_csv()

',metodo,passo_inicial,iteracoes,raio,semente,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,tempo\n'

In [None]:
lista_raio = [2, ] #10, 50, 100, 1000]
lista_eta = [0.01,]# 0.025, 0.1, 0.5, 1]
lista_iteracoes = [1,]# 3, 5, 10]
raio = 49
eta = 
iteracoes = 2
for raio in lista_raio:
    for eta in lista_eta:
        for iteracoes in lista_iteracoes:
            for seed in range(30):
                X_train, X_test, y_train, y_test = split_dataset(X, y, seed)
                net = NeuralNet([784, 32, 32, 10], raio, random_state = seed)
                net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
                
                acuracia_treino = acuracia_treino.append(net.acuracia_treino, ignore_index = True)
                acuracia_teste  = acuracia_teste.append(net.acuracia_teste, ignore_index = True) 
                norma_peso      = norma_peso.append(net.norma_peso, ignore_index = True)
                perda_treino    = perda_treino.append(net.perda_treino, ignore_index = True)
                perda_teste     = perda_teste.append(net.perda_teste, ignore_index = True)

                print(acuracia_treino.tail(5))

In [7]:
random_seed = 2

epochs = 100
mini_batch_size = 128
eta = 0.10288171 #eta otimo para SGD

X_train, X_test, y_train, y_test = split_dataset(X, y, random_seed)
args = (X_train, y_train, epochs, mini_batch_size, random_seed)

net = NeuralNet([784, 32, 32, 10], raio = None, random_state = random_seed)
net.SGD(X_train, y_train, epochs, mini_batch_size, eta, X_test, y_test)
acuracia_teste = net.accuracy_test
acuracia_teste

||W_inicial|| =  165.8116065909119


  return 1.0/(1.0 + np.exp(-z))


0.8862

In [40]:
from scipy.optimize import differential_evolution as de 
from sklearn.model_selection import KFold

random_seed = 2

epochs = 100
mini_batch_size = 128

# x  = [eta]
lb = [1e-4, ]
ub = [1, ] 

X_train, X_test, y_train, y_test = split_dataset(X, y, random_seed)
args = (X_train, y_train, epochs, mini_batch_size, random_seed)


def objective_function(x, *args):
    X, y, epochs, mini_batch_size, rd = args
    eta       = x[0]

    kf = KFold(n_splits = 3, shuffle = True)
    kf.get_n_splits(X)
    acuracia = []
    
    ini = time.time()
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        net = NeuralNet([784, 32, 32, 10], raio, random_state = rd)
        net.SGD(X_train, y_train, epochs, mini_batch_size, eta, X_test, y_test)
        acuracia_teste = net.accuracy_test
        acuracia.append(acuracia_teste)
    end = time.time()

    acuracia = np.mean(acuracia)
    print(f'Acuracia = {acuracia:.4f}')
    print(f'Tempo = {(end-ini):.4f}')
    
    return 1 - acuracia

In [42]:
res

     fun: 0.06838003885548105
 message: 'Optimization terminated successfully.'
    nfev: 178
     nit: 17
 success: True
       x: array([0.10288171])

- fun: 0.06838003885548105
- message: 'Optimization terminated successfully.'
- nfev: 178
- nit: 17
- success: True
- x: array([0.10288171])

In [41]:
init = np.random.uniform(low=lb, high=ub, size=(8,1))
res = de(objective_function, bounds=list(zip(lb, ub)), args=args,
             init=init,
             maxiter=20, tol=1e-5,
             mutation=0.9,  recombination=0.9,
             disp=True, workers=-1,
             seed=random_seed)



||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
Acuracia = 0.9297
Tempo = 2432.5738
||W_inicial|| =  37.44196536323332
Acuracia = 0.9278
Tempo = 2475.5457
||W_inicial|| =  37.44196536323332
Acuracia = 0.9293
Tempo = 2509.0116
Acuracia = 0.9295
Tempo = 2518.0667
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
||W_inicial|| =  37.44196536323332
Acuracia = 0.928

In [39]:
random_seed = 1

epochs = 40
mini_batch_size = 128

X_train, X_test, y_train, y_test = split_dataset(X, y, random_seed)

raio = 49
eta = 1 #0.23111951
iteracoes = 2

net = NeuralNet([784, 32, 32, 10], raio, random_state = random_seed)
net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
acuracia_teste = net.accuracy_test
acuracia_teste

||W_inicial|| =  17.803584496718027
tempo vet2mat FDIPA = 0.0000s
tempo df0 FDIPA = 0.0079s
tempo dg0 FDIPA = 0.0002s
tempo vet2mat FDIPA = 0.0000s
tempo df0 FDIPA = 0.0088s
tempo dg0 FDIPA = 0.0002s
Tempo mini_batch = 0.0391s
tempo vet2mat FDIPA = 0.0000s
tempo df0 FDIPA = 0.0108s
tempo dg0 FDIPA = 0.0002s
tempo vet2mat FDIPA = 0.0000s
tempo df0 FDIPA = 0.0532s
tempo dg0 FDIPA = 0.0021s
Tempo mini_batch = 0.1160s
tempo vet2mat FDIPA = 0.0000s
tempo df0 FDIPA = 0.0309s
tempo dg0 FDIPA = 0.0045s
tempo vet2mat FDIPA = 0.0000s
tempo df0 FDIPA = 0.0181s
tempo dg0 FDIPA = 0.0056s
Tempo mini_batch = 0.0961s
tempo vet2mat FDIPA = 0.0000s
tempo df0 FDIPA = 0.0175s
tempo dg0 FDIPA = 0.0005s
tempo vet2mat FDIPA = 0.0000s
tempo df0 FDIPA = 0.0100s
tempo dg0 FDIPA = 0.0005s
Tempo mini_batch = 0.0829s
tempo vet2mat FDIPA = 0.0000s
tempo df0 FDIPA = 0.0174s
tempo dg0 FDIPA = 0.0002s
tempo vet2mat FDIPA = 0.0000s
tempo df0 FDIPA = 0.0256s
tempo dg0 FDIPA = 0.0010s
Tempo mini_batch = 0.0959s
tempo vet

KeyboardInterrupt: 

In [16]:
random_seed = 1

epochs = 100
mini_batch_size = 128

X_train, X_test, y_train, y_test = split_dataset(X, y, random_seed)

raio = 49
eta = 0.23111951
iteracoes = 2

net = NeuralNet([784, 32, 32, 10], raio, random_state = random_seed)
net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
acuracia_teste = net.accuracy_test
acuracia_teste

||W_inicial|| =  17.803584496718027
Epoca 1/100, tempo = 17.8851s
Epoca 2/100, tempo = 19.6035s
Epoca 3/100, tempo = 20.6651s
Epoca 4/100, tempo = 22.0408s
Epoca 5/100, tempo = 21.6029s
Epoca 6/100, tempo = 24.2285s
Epoca 7/100, tempo = 28.8981s
Epoca 8/100, tempo = 21.3825s
Epoca 9/100, tempo = 18.2234s
Epoca 10/100, tempo = 20.7452s
Epoca 11/100, tempo = 20.0588s
Epoca 12/100, tempo = 22.5227s
Epoca 13/100, tempo = 35.4645s
Epoca 14/100, tempo = 16.9723s
Epoca 15/100, tempo = 17.8546s
Epoca 16/100, tempo = 17.1795s
Epoca 17/100, tempo = 18.0851s
Epoca 18/100, tempo = 17.1769s
Epoca 19/100, tempo = 18.3757s
Epoca 20/100, tempo = 17.1424s
Epoca 21/100, tempo = 30.1887s
Epoca 22/100, tempo = 24.3456s
Epoca 23/100, tempo = 16.2726s
Epoca 24/100, tempo = 16.8762s
Epoca 25/100, tempo = 17.8723s
Epoca 26/100, tempo = 19.3228s
Epoca 27/100, tempo = 17.3696s
Epoca 28/100, tempo = 18.7706s
Epoca 29/100, tempo = 19.5229s
Epoca 30/100, tempo = 39.1258s
Epoca 31/100, tempo = 15.9427s
Epoca 32/100

0.9603

In [8]:
random_seed = 1

epochs = 100
mini_batch_size = 128

X_train, X_test, y_train, y_test = split_dataset(X, y, random_seed)

raio = 49
eta = 0.23111951
iteracoes = 2

net = NeuralNet([784, 32, 32, 10], raio, random_state = random_seed)
net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
acuracia_teste = net.accuracy_test
acuracia_teste

||W_inicial|| =  17.803584496718027


In [10]:
acuracia_teste

0.9603

In [13]:
random_seed = 1

epochs = 100
mini_batch_size = 128

X_train, X_test, y_train, y_test = split_dataset(X, y, random_seed)

raio = 49
eta = 0.23111951
iteracoes = 2

net = NeuralNet([784, 32, 32, 10], raio = None, random_state = random_seed)
net.SGD(X_train, y_train, epochs, mini_batch_size, eta, X_test, y_test)
acuracia_teste = net.accuracy_test
acuracia_teste

||W_inicial|| =  165.26999627562012


  return 1.0/(1.0 + np.exp(-z))


0.9

In [14]:
random_seed = 1

epochs = 100
mini_batch_size = 128

X_train, X_test, y_train, y_test = split_dataset(X, y, random_seed)

raio = 49
eta = 0.23111951
iteracoes = 1

net = NeuralNet([784, 32, 32, 10], raio, random_state = random_seed)
net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
acuracia_teste = net.accuracy_test
acuracia_teste

||W_inicial|| =  17.803584496718027


0.9529

In [7]:
import pandas as pd
#Parametros fixados
epochs = 10
mini_batch_size = 128

#Data Frames dos resultados
acuracia_treino = pd.DataFrame(columns=['metodo', 'passo_inicial', 'iteracoes', 'raio', 'semente'])
acuracia_teste  = pd.DataFrame(columns=['metodo', 'passo_inicial', 'iteracoes', 'raio', 'semente'])
norma_peso      = pd.DataFrame(columns=['metodo', 'passo_inicial', 'iteracoes', 'raio', 'semente'])
perda_treino    = pd.DataFrame(columns=['metodo', 'passo_inicial', 'iteracoes', 'raio', 'semente'])
perda_teste     = pd.DataFrame(columns=['metodo', 'passo_inicial', 'iteracoes', 'raio', 'semente'])

for i in range(epochs):
    acuracia_treino[str(i+1)] = []
    acuracia_teste[str(i+1)]  = []
    norma_peso[str(i+1)]      = []
    perda_treino[str(i+1)]    = []
    perda_teste[str(i+1)]     = []

acuracia_treino['tempo'] = []
acuracia_teste['tempo']  = []
norma_peso['tempo']      = []
perda_treino['tempo']    = []
perda_teste['tempo']     = []

lista_raio = [2, ] #10, 50, 100, 1000]
lista_eta = [0.01,]# 0.025, 0.1, 0.5, 1]
lista_iteracoes = [1,]# 3, 5, 10]

for raio in lista_raio:
    for eta in lista_eta:
        for iteracoes in lista_iteracoes:
            for seed in range(30):
                X_train, X_test, y_train, y_test = split_dataset(X, y, seed)
                net = NeuralNet([784, 32, 32, 10], raio, random_state = seed)
                net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
                
                acuracia_treino = acuracia_treino.append(net.acuracia_treino, ignore_index = True)
                acuracia_teste  = acuracia_teste.append(net.acuracia_teste, ignore_index = True) 
                norma_peso      = norma_peso.append(net.norma_peso, ignore_index = True)
                perda_treino    = perda_treino.append(net.perda_treino, ignore_index = True)
                perda_teste     = perda_teste.append(net.perda_teste, ignore_index = True)

                print(acuracia_treino.tail(5))

In [None]:
lista_raio = [2, ] #10, 50, 100, 1000]
lista_eta = [0.01,]# 0.025, 0.1, 0.5, 1]
lista_iteracoes = [1,]# 3, 5, 10]

for raio in lista_raio:
    for eta in lista_eta:
        for iteracoes in lista_iteracoes:
            for seed in range(30):
                X_train, X_test, y_train, y_test = split_dataset(X, y, seed)
                net = NeuralNet([784, 32, 32, 10], raio, random_state = seed)
                net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
                
                acuracia_treino = acuracia_treino.append(net.acuracia_treino, ignore_index = True)
                acuracia_teste  = acuracia_teste.append(net.acuracia_teste, ignore_index = True) 
                norma_peso      = norma_peso.append(net.norma_peso, ignore_index = True)
                perda_treino    = perda_treino.append(net.perda_treino, ignore_index = True)
                perda_teste     = perda_teste.append(net.perda_teste, ignore_index = True)

                print(acuracia_treino.tail(5))

In [48]:
acuracia_treino = acuracia_treino.update(net.acuracia_teste, ignore_index=True)
acuracia_treino.head()

TypeError: update() got an unexpected keyword argument 'ignore_index'

# Variaveis para o 'de'

In [14]:
random_seed = 1

epochs = 100
mini_batch_size = 128

# x  = [raio, eta, iteracoes]
lb = [2,   1e-4,       1]
ub = [1000,   1,   4.99]

X_train, X_test, y_train, y_test = split_dataset(X, y, random_seed)
args = (X_train, y_train, epochs, mini_batch_size, random_seed)

In [15]:
from scipy.optimize import differential_evolution as de 
from sklearn.model_selection import KFold

def objective_function(x, *args):
    X, y, epochs, mini_batch_size, rd = args
    raio      = int(x[0])
    eta       = x[1]
    iteracoes = int(x[2])

    kf = KFold(n_splits = 3, shuffle = True)
    kf.get_n_splits(X)
    acuracia = []
    
    ini = time.time()
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        net = NeuralNet([784, 32, 32, 10], raio, random_state = rd)
        net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
        acuracia_teste = net.accuracy_test
        acuracia.append(acuracia_teste)
    end = time.time()

    acuracia = np.mean(acuracia)
    print(f'Acuracia = {acuracia:.4f}')
    print(f'Tempo = {(end-ini):.4f}')
    
    return 1 - acuracia

In [18]:
init = np.random.uniform(low=lb, high=ub, size=(5,3))
res = de(objective_function, bounds=list(zip(lb, ub)), args=args,
             init=init,
             maxiter=20, tol=1e-5,
             mutation=0.9,  recombination=0.9,
             disp=True, workers=-1,
             seed=random_seed)

||W_inicial|| =  263.34468734728756


  return 1.0/(1.0 + np.exp(-z))


||W_inicial|| =  367.5698382551576


  return 1.0/(1.0 + np.exp(-z))


||W_inicial|| =  347.1698976860016


  return 1.0/(1.0 + np.exp(-z))


||W_inicial|| =  349.0244377377431


  return 1.0/(1.0 + np.exp(-z))


||W_inicial|| =  347.1698976860016
||W_inicial|| =  347.1698976860016
||W_inicial|| =  263.34468734728756
||W_inicial|| =  367.5698382551576
||W_inicial|| =  349.0244377377431
Acuracia = 0.8294
Tempo = 4162.2594
||W_inicial|| =  14.094504393235107
||W_inicial|| =  263.34468734728756
||W_inicial|| =  14.094504393235107
||W_inicial|| =  367.5698382551576
||W_inicial|| =  349.0244377377431
||W_inicial|| =  14.094504393235107
Acuracia = 0.8625
Tempo = 7934.4436
Acuracia = 0.9511
Tempo = 5890.1295
Acuracia = 0.8093
Tempo = 10220.6689
Acuracia = 0.8548
Tempo = 10241.0035
||W_inicial|| =  12.239964341493646
||W_inicial|| =  90.87246253533162
||W_inicial|| =  238.49385065395194
||W_inicial|| =  22.25448062089754
||W_inicial|| =  12.239964341493646
||W_inicial|| =  90.87246253533162
||W_inicial|| =  22.25448062089754
||W_inicial|| =  238.49385065395194
||W_inicial|| =  12.239964341493646
||W_inicial|| =  22.25448062089754
||W_inicial|| =  90.87246253533162
||W_inicial|| =  238.49385065395194
Ac

  return 1.0/(1.0 + np.exp(-z))


||W_inicial|| =  370.5371023379441
||W_inicial|| =  370.5371023379441
Acuracia = 0.8478
Tempo = 2577.2501
||W_inicial|| =  370.16619432759575
||W_inicial|| =  370.16619432759575
||W_inicial|| =  370.16619432759575
Acuracia = 0.8487
Tempo = 3423.2522
||W_inicial|| =  370.5371023379441
||W_inicial|| =  370.5371023379441
||W_inicial|| =  370.5371023379441
Acuracia = 0.8504
Tempo = 3577.9938
||W_inicial|| =  370.5371023379441
||W_inicial|| =  370.5371023379441
||W_inicial|| =  370.5371023379441
Acuracia = 0.8478
Tempo = 3590.3842
||W_inicial|| =  165.42497261533836
||W_inicial|| =  165.42497261533836
||W_inicial|| =  165.42497261533836
Acuracia = 0.8892
Tempo = 2748.6579
||W_inicial|| =  165.42497261533836
||W_inicial|| =  165.42497261533836
||W_inicial|| =  165.42497261533836
Acuracia = 0.8892
Tempo = 2455.6683
||W_inicial|| =  165.42497261533836
||W_inicial|| =  165.42497261533836
||W_inicial|| =  165.42497261533836
Acuracia = 0.8889
Tempo = 2814.2471
||W_inicial|| =  165.42497261533836


In [19]:
res

     fun: 0.04425996323467263
     jac: array([   0.        , 1999.96000186,    0.        ])
 message: 'Maximum number of iterations has been exceeded.'
    nfev: 233
     nit: 20
 success: False
       x: array([49.47811956,  0.23111951,  2.1178771 ])

- fun: 0.04425996323467263
- jac: array([   0.        , 1999.96000186,    0.        ])
- message: 'Maximum number of iterations has been exceeded.'
- nfev: 233
- nit: 20
- success: False
- x: array([49.47811956,  0.23111951,  2.1178771 ])

In [40]:
raio = 100
rd = 5
net = NeuralNet([784, 32, 32, 10], raio, random_state = rd)

rand_factor = 0.1162
||W_inicial|| =  11.506651264972714


In [10]:
random_seed = 2

epochs = 100
mini_batch_size = 128

# x  = [raio, eta, iteracoes]
lb = [2,   1e-4, ] #     1]
ub = [1000,   1, ] #  4.99]

X_train, X_test, y_train, y_test = split_dataset(X, y, random_seed)
args = (X_train, y_train, epochs, mini_batch_size, random_seed)

In [11]:
from scipy.optimize import differential_evolution as de 
from sklearn.model_selection import KFold

def objective_function(x, *args):
    X, y, epochs, mini_batch_size, rd = args
    raio      = x[0]
    eta       = x[1]
    iteracoes = 2

    kf = KFold(n_splits = 3, shuffle = True)
    kf.get_n_splits(X)
    acuracia = []
    
    ini = time.time()
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        net = NeuralNet([784, 32, 32, 10], raio, random_state = rd)
        net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
        acuracia_teste = net.accuracy_test
        acuracia.append(acuracia_teste)
    end = time.time()

    acuracia = np.mean(acuracia)
    print(f'Acuracia = {acuracia:.4f}')
    print(f'Tempo = {(end-ini):.4f}')
    
    return 1 - acuracia

In [13]:
res

     fun: 0.04614002403739281
 message: 'Maximum number of iterations has been exceeded.'
    nfev: 357
     nit: 20
 success: False
       x: array([47.40463817,  0.3239329 ])

- fun: 0.04614002403739281
- message: 'Maximum number of iterations has been exceeded.'
- nfev: 357
- nit: 20
- success: False
- x: array([47.40463817,  0.3239329 ])

Executar esse

In [12]:
init = np.random.uniform(low=lb, high=ub, size=(8,2))
res = de(objective_function, bounds=list(zip(lb, ub)), args=args,
             init=init,
             maxiter=20, tol=1e-5,
             mutation=0.9,  recombination=0.9,
             disp=True, workers=-1,
             seed=random_seed)



||W_inicial|| =  652.5060246927247


  return 1.0/(1.0 + np.exp(-z))


||W_inicial|| =  651.5689611748862


  return 1.0/(1.0 + np.exp(-z))


||W_inicial|| =  463.80193695974464


  return 1.0/(1.0 + np.exp(-z))


||W_inicial|| =  375.65623618039683


  return 1.0/(1.0 + np.exp(-z))


||W_inicial|| =  652.5060246927247
||W_inicial|| =  463.80193695974464
||W_inicial|| =  651.5689611748862
||W_inicial|| =  375.65623618039683
||W_inicial|| =  652.5060246927247
||W_inicial|| =  463.80193695974464
||W_inicial|| =  375.65623618039683
||W_inicial|| =  651.5689611748862
Acuracia = 0.7557
Tempo = 6214.9717
||W_inicial|| =  698.8834430498256
Acuracia = 0.7985
Tempo = 6265.0054
||W_inicial|| =  36.30861634694751
Acuracia = 0.7632
Tempo = 6300.4207
||W_inicial|| =  18.462389697022093
Acuracia = 0.7929
Tempo = 6301.4251
||W_inicial|| =  357.93839927819675
||W_inicial|| =  698.8834430498256
||W_inicial|| =  36.30861634694751
||W_inicial|| =  357.93839927819675
||W_inicial|| =  18.462389697022093
||W_inicial|| =  698.8834430498256
||W_inicial|| =  36.30861634694751
||W_inicial|| =  357.93839927819675
||W_inicial|| =  18.462389697022093
Acuracia = 0.7436
Tempo = 5191.2213
Acuracia = 0.9497
Tempo = 5147.5986
Acuracia = 0.8310
Tempo = 5146.7255
Acuracia = 0.9063
Tempo = 5158.3294
||

  return 1.0/(1.0 + np.exp(-z))


||W_inicial|| =  779.2609041222937
||W_inicial|| =  779.2609041222937
Acuracia = 0.7394
Tempo = 3161.4710
||W_inicial|| =  779.2609041144932
||W_inicial|| =  779.2609041144932
||W_inicial|| =  779.2609041144932
Acuracia = 0.7458
Tempo = 3015.2388
||W_inicial|| =  779.2609041222937
||W_inicial|| =  779.2609041222937
||W_inicial|| =  779.2609041222937
Acuracia = 0.7400
Tempo = 3721.0158
||W_inicial|| =  440.6855647619268
||W_inicial|| =  440.6855647619268
||W_inicial|| =  440.6855647619268
Acuracia = 0.8233
Tempo = 2257.1558
||W_inicial|| =  440.68556476972725
||W_inicial|| =  440.68556476972725
||W_inicial|| =  440.68556476972725
Acuracia = 0.8218
Tempo = 1431.9433
||W_inicial|| =  440.6855647619268
||W_inicial|| =  440.6855647619268
||W_inicial|| =  440.6855647619268
Acuracia = 0.8242
Tempo = 1745.4255
||W_inicial|| =  210.74363378942397
||W_inicial|| =  210.74363378942397
||W_inicial|| =  210.74363378942397
Acuracia = 0.8792
Tempo = 2135.9396
||W_inicial|| =  210.7436337972244
||W_ini

In [13]:
from scipy.optimize import differential_evolution as de 

epochs = 100
mini_batch_size = 128

# x  = [raio, eta, iteracoes]
lb = [2,  1e-4,   1]
ub = [200,   1,   4.99]

args = (X, y, epochs, mini_batch_size)

def objective_function(x, *args):
    X, y, epochs, mini_batch_size = args
    raio      = int(x[0])
    eta       = x[1]
    iteracoes = int(x[2])
    # acuracia = []
    rd = 1
    # for rd in range(30):
    ini = time.time()
    X_train, X_test, y_train, y_test = split_dataset(X, y, rd)
    net = NeuralNet([784, 32, 32, 10], raio, random_state = rd)
    net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
    acuracia_teste_max = np.max(net.acuracia_teste_lista)
    acuracia_teste_ultimo = net.acuracia_teste_lista[-1]

    acuracia = (acuracia_teste_max + acuracia_teste_ultimo)/2
    end = time.time()
    print(f'Acuracia = {acuracia:.4f}')
    print(f'Tempo = {(end-ini):.4f}')

    return 1 - acuracia

random_seed = 1

init = np.random.uniform(low=lb, high=ub, size=(5,3))          
res = de(objective_function, bounds=list(zip(lb, ub)), args=args,
             init=init, 
             maxiter=20, tol=1e-8,  
             mutation=0.9,  recombination=0.9, 
             disp=True, workers=-1,
             seed=random_seed)      



||W_inicial|| =  41.170789148660454
||W_inicial|| =  66.7634418626926
||W_inicial|| =  64.16708579025456
||W_inicial|| =  19.2872165381112
Acuracia = 0.9460
Tempo = 2898.7502
||W_inicial|| =  31.15627286925655
Acuracia = 0.9498
Tempo = 4161.7979
Acuracia = 0.9369
Tempo = 6511.8262
Acuracia = 0.9544
Tempo = 6513.0092
Acuracia = 0.9585
Tempo = 4586.7467
||W_inicial|| =  71.21433798687211
||W_inicial|| =  11.49814832079706
||W_inicial|| =  40.428973127963864
||W_inicial|| =  10.385424289752184
Acuracia = 0.9279
Tempo = 3312.9496
||W_inicial|| =  60.82891369711994
Acuracia = 0.9374
Tempo = 5101.3320
Acuracia = 0.9528
Tempo = 5107.2994


  return 1.0/(1.0 + np.exp(-z))


Acuracia = 0.9447
Tempo = 6321.4470
Acuracia = 0.9466
Tempo = 3028.2066
differential_evolution step 1: f(x)= 0.0415
||W_inicial|| =  54.89438553154726
||W_inicial|| =  30.04354883821168
||W_inicial|| =  21.883572610549244
||W_inicial|| =  39.31624909691898
Acuracia = 0.9544
Tempo = 3011.5359
||W_inicial|| =  36.348985014132644
Acuracia = 0.9495
Tempo = 3039.1027
Acuracia = 0.9558
Tempo = 4494.6679
Acuracia = 0.9599
Tempo = 4532.2416
Acuracia = 0.9568
Tempo = 2436.8454
differential_evolution step 2: f(x)= 0.0401
||W_inicial|| =  27.44719276577363
||W_inicial|| =  29.301732817515088
||W_inicial|| =  19.2872165381112
||W_inicial|| =  30.04354883821168


  return 1.0/(1.0 + np.exp(-z))
  return 1.0/(1.0 + np.exp(-z))


Acuracia = 0.9524
Tempo = 4096.8439
||W_inicial|| =  14.4654124035834
Acuracia = 0.9584
Tempo = 4980.1346
Acuracia = 0.9565
Tempo = 5047.1234
Acuracia = 0.9576
Tempo = 5052.5155
Acuracia = 0.9461
Tempo = 1594.2366
differential_evolution step 3: f(x)= 0.0401
||W_inicial|| =  27.44719276577363
||W_inicial|| =  34.86535297273947
||W_inicial|| =  29.301732817515088
||W_inicial|| =  22.62538863124583
Acuracia = 0.9587
Tempo = 4916.9873
||W_inicial|| =  14.4654124035834
Acuracia = 0.9562
Tempo = 4959.9325
Acuracia = 0.9592
Tempo = 4965.0495
Acuracia = 0.9587
Tempo = 4981.3745
Acuracia = 0.9381
Tempo = 661.4018
differential_evolution step 4: f(x)= 0.0401
||W_inicial|| =  9.272700258707307
||W_inicial|| =  26.705376745077047
||W_inicial|| =  33.752628941694596
||W_inicial|| =  34.86535297273947
Acuracia = 0.9054
Tempo = 2050.0696
||W_inicial|| =  25.221744703683875
Acuracia = 0.9592
Tempo = 4942.5751
Acuracia = 0.9605
Tempo = 4954.4047
Acuracia = 0.9542
Tempo = 5006.8260
Acuracia = 0.9532
Temp

  return 1.0/(1.0 + np.exp(-z))


Acuracia = 0.9597
Tempo = 1225.4321
||W_inicial|| =  33.752628941694596
Acuracia = 0.9611
Tempo = 1224.9945
||W_inicial|| =  33.752628941694596
Acuracia = 0.9591
Tempo = 1222.4189
||W_inicial|| =  33.752628941694596
Acuracia = 0.9591
Tempo = 1224.3195
||W_inicial|| =  33.752628941694596
Acuracia = 0.9583
Tempo = 1225.6460
||W_inicial|| =  33.752628941694596
Acuracia = 0.9591
Tempo = 1219.5063
||W_inicial|| =  33.752628941694596
Acuracia = 0.9601
Tempo = 1223.6880
||W_inicial|| =  33.752628941694596
Acuracia = 0.9601
Tempo = 1223.8830
||W_inicial|| =  33.752628941694596
Acuracia = 0.9587
Tempo = 1221.7111
||W_inicial|| =  33.752628941694596
Acuracia = 0.9601
Tempo = 1221.9221
||W_inicial|| =  33.752628941694596
Acuracia = 0.9586
Tempo = 1223.8146
||W_inicial|| =  33.752628941694596
Acuracia = 0.9586
Tempo = 1220.5257
||W_inicial|| =  33.752628941694596
Acuracia = 0.9581
Tempo = 1223.6171
||W_inicial|| =  33.752628941694596
Acuracia = 0.9586
Tempo = 1225.6161
||W_inicial|| =  33.75262894

In [14]:
res

     fun: 0.039349999999999996
     jac: array([     0.        , 194999.99902017,      0.        ])
 message: 'Optimization terminated successfully.'
    nfev: 163
     nit: 14
 success: True
       x: array([92.15726832,  0.5392877 ,  4.27923394])

- fun: 0.039349999999999996
- jac: array([     0.        , 194999.99902017,      0.        ])
- message: 'Optimization terminated successfully.'
- nfev: 163
- nit: 14
- success: True
- x: array([92.15726832,  0.5392877 ,  4.27923394])       

In [None]:
#Parametros para alterar
raio = 100
eta = 0.025
iteracoes = 1

net = NeuralNet([784, 32, 32, 10], raio, random_state = 0)

print("Comeco:")
ini = time.time()
net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
end = time.time()
print("Fim")
print("Tempo: \t\t" + str(end-ini) + " (s)" )

||W_inicial|| =  85.3772635479343
Comeco:
Tempo  epoch: 		4.387790679931641 (s)




FDIPA: Epoch 1/100  | Acurácia de Treino 0.3692 | Acurácia de Teste 0.3639 | ||W|| 85.1899 | Perda treino 1.8626 
Tempo  epoch: 		4.447407484054565 (s)
FDIPA: Epoch 2/100  | Acurácia de Treino 0.5324 | Acurácia de Teste 0.5258 | ||W|| 85.0662 | Perda treino 1.5357 
Tempo  epoch: 		4.368814468383789 (s)
FDIPA: Epoch 3/100  | Acurácia de Treino 0.6192 | Acurácia de Teste 0.6167 | ||W|| 84.9555 | Perda treino 1.2989 
Tempo  epoch: 		4.404610872268677 (s)
FDIPA: Epoch 4/100  | Acurácia de Treino 0.6702 | Acurácia de Teste 0.6700 | ||W|| 84.8504 | Perda treino 1.1291 
Tempo  epoch: 		4.561392307281494 (s)
FDIPA: Epoch 5/100  | Acurácia de Treino 0.7076 | Acurácia de Teste 0.7044 | ||W|| 84.7519 | Perda treino 1.0042 
Tempo  epoch: 		4.377145290374756 (s)
FDIPA: Epoch 6/100  | Acurácia de Treino 0.7357 | Acurácia de Teste 0.7348 | ||W|| 84.6536 | Perda treino 0.9087 
Tempo  epoch: 		4.330930709838867 (s)
FDIPA: Epoch 7/100  | Acurácia de Treino 0.7556 | Acurácia de Teste 0.7516 | ||W|| 84.55

KeyboardInterrupt: ignored

In [None]:
raio = 100
net = NeuralNet([784, 32, 32, 10], raio, random_state = 0)

epochs = 100
mini_batch_size = 100
eta = 0.025
iteracoes = 1


print("Comeco:")
ini = time.time()
net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
end = time.time()
print("Fim")
print("Tempo: \t\t" + str(end-ini) + " (s)" )

||W_inicial|| =  85.3772635479343
Comeco:
Tempo  epoch: 		4.584733009338379 (s)




FDIPA: Epoch 1/100  | Acurácia de Treino 0.3692 | Acurácia de Teste 0.3639 | ||W|| 85.1899 | Perda treino 93129.4417 
Tempo  epoch: 		4.400588274002075 (s)
FDIPA: Epoch 2/100  | Acurácia de Treino 0.5324 | Acurácia de Teste 0.5258 | ||W|| 85.0662 | Perda treino 76782.5386 
Tempo  epoch: 		4.379154920578003 (s)
FDIPA: Epoch 3/100  | Acurácia de Treino 0.6192 | Acurácia de Teste 0.6167 | ||W|| 84.9555 | Perda treino 64945.1480 
Tempo  epoch: 		4.367553949356079 (s)
FDIPA: Epoch 4/100  | Acurácia de Treino 0.6702 | Acurácia de Teste 0.6700 | ||W|| 84.8504 | Perda treino 56455.1418 
Tempo  epoch: 		4.397381782531738 (s)
FDIPA: Epoch 5/100  | Acurácia de Treino 0.7076 | Acurácia de Teste 0.7044 | ||W|| 84.7519 | Perda treino 50208.7790 
Tempo  epoch: 		4.357771873474121 (s)
FDIPA: Epoch 6/100  | Acurácia de Treino 0.7357 | Acurácia de Teste 0.7348 | ||W|| 84.6536 | Perda treino 45433.6826 
Tempo  epoch: 		4.501313924789429 (s)
FDIPA: Epoch 7/100  | Acurácia de Treino 0.7556 | Acurácia de Te

KeyboardInterrupt: ignored

In [None]:
raio = 100
# Arquitetura da rede
# net = NeuralNet([2, 10, 1])
# net = NeuralNet([784, 100, 100, 10]) #10300 tempo = 378
net = NeuralNet([784, 32, 32, 10], raio) #10300 tempo = 378
# net = NeuralNet([2, 50, 50, 50, 50, 1])  # 7650 tempo = 152
# net = NeuralNet([2, 70, 30, 30, 70, 1])  # 5310 tempo = 158
# net = NeuralNet([2, 80, 20, 20, 80, 1])  # tempo = 167

epochs = 1000
mini_batch_size = 100
eta = 0.025
iteracoes = 3
# raio = 50

print("Comeco:")
ini = time.time()
# net.SGD(X_train, y_train, epochs, mini_batch_size, eta, X_test, y_test)
net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
end = time.time()
print("Fim")
print("Tempo: \t\t" + str(end-ini) + " (s)" )

||W_inicial|| =  117.16332215211462
Comeco:




Tempo  epoch: 		11.1318941116333 (s)
FDIPA: Epoch 1/1000  | Acurácia de Treino 0.5670 | Acurácia de Teste 0.5609 | ||W|| 117.0655 | Perda treino 64389.0995 
Tempo  epoch: 		10.851339340209961 (s)
FDIPA: Epoch 2/1000  | Acurácia de Treino 0.6761 | Acurácia de Teste 0.6710 | ||W|| 117.0882 | Perda treino 49418.9013 
Tempo  epoch: 		11.035679340362549 (s)
FDIPA: Epoch 3/1000  | Acurácia de Treino 0.7294 | Acurácia de Teste 0.7222 | ||W|| 117.1005 | Perda treino 41836.1797 
Tempo  epoch: 		11.195695638656616 (s)
FDIPA: Epoch 4/1000  | Acurácia de Treino 0.7614 | Acurácia de Teste 0.7542 | ||W|| 117.1022 | Perda treino 37052.9015 
Tempo  epoch: 		10.900336265563965 (s)
FDIPA: Epoch 5/1000  | Acurácia de Treino 0.7832 | Acurácia de Teste 0.7765 | ||W|| 117.0949 | Perda treino 33636.5532 
Tempo  epoch: 		10.93850564956665 (s)
FDIPA: Epoch 6/1000  | Acurácia de Treino 0.8010 | Acurácia de Teste 0.7936 | ||W|| 117.0807 | Perda treino 31063.8108 
Tempo  epoch: 		10.878264427185059 (s)
FDIPA: Epo

In [None]:
raio = 200
# Arquitetura da rede
# net = NeuralNet([2, 10, 1])
# net = NeuralNet([784, 100, 100, 10]) #10300 tempo = 378
net = NeuralNet([784, 32, 32, 10], raio) #10300 tempo = 378
# net = NeuralNet([2, 50, 50, 50, 50, 1])  # 7650 tempo = 152
# net = NeuralNet([2, 70, 30, 30, 70, 1])  # 5310 tempo = 158
# net = NeuralNet([2, 80, 20, 20, 80, 1])  # tempo = 167

epochs = 1000
mini_batch_size = 100
eta = 0.01
iteracoes = 5
# raio = 50

print("Comeco:")
ini = time.time()
# net.SGD(X_train, y_train, epochs, mini_batch_size, eta, X_test, y_test)
net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
end = time.time()
print("Fim")
print("Tempo: \t\t" + str(end-ini) + " (s)" )

||W_inicial|| =  63.621533317169536
Comeco:
Tempo  epoch: 		16.45392107963562 (s)




FDIPA: Epoch 1/1000  | Acurácia de Treino 0.5431 | Acurácia de Teste 0.5441 | ||W|| 63.7250 | Perda treino 79329.7554 
Tempo  epoch: 		16.434274196624756 (s)
FDIPA: Epoch 2/1000  | Acurácia de Treino 0.6926 | Acurácia de Teste 0.6934 | ||W|| 64.0069 | Perda treino 54652.5927 
Tempo  epoch: 		16.511661767959595 (s)
FDIPA: Epoch 3/1000  | Acurácia de Treino 0.7668 | Acurácia de Teste 0.7623 | ||W|| 64.2659 | Perda treino 41412.0831 
Tempo  epoch: 		16.406798601150513 (s)
FDIPA: Epoch 4/1000  | Acurácia de Treino 0.8033 | Acurácia de Teste 0.7996 | ||W|| 64.4659 | Perda treino 34242.3940 
Tempo  epoch: 		16.458418130874634 (s)
FDIPA: Epoch 5/1000  | Acurácia de Treino 0.8242 | Acurácia de Teste 0.8180 | ||W|| 64.6182 | Perda treino 29938.6978 
Tempo  epoch: 		17.01644206047058 (s)
FDIPA: Epoch 6/1000  | Acurácia de Treino 0.8392 | Acurácia de Teste 0.8299 | ||W|| 64.7360 | Perda treino 27138.5039 
Tempo  epoch: 		16.508888244628906 (s)
FDIPA: Epoch 7/1000  | Acurácia de Treino 0.8507 | Ac

In [None]:
raio = 200
# Arquitetura da rede
# net = NeuralNet([2, 10, 1])
# net = NeuralNet([784, 100, 100, 10]) #10300 tempo = 378
net = NeuralNet([784, 32, 32, 10], raio) #10300 tempo = 378
# net = NeuralNet([2, 50, 50, 50, 50, 1])  # 7650 tempo = 152
# net = NeuralNet([2, 70, 30, 30, 70, 1])  # 5310 tempo = 158
# net = NeuralNet([2, 80, 20, 20, 80, 1])  # tempo = 167

epochs = 1000
mini_batch_size = 100
eta = 0.05
iteracoes = 1
# raio = 50

print("Comeco:")
ini = time.time()
# net.SGD(X_train, y_train, epochs, mini_batch_size, eta, X_test, y_test)
net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
end = time.time()
print("Fim")
print("Tempo: \t\t" + str(end-ini) + " (s)" )

||W_inicial|| =  9.50921294128477
Comeco:
Tempo  epoch: 		5.524610280990601 (s)




FDIPA: Epoch 1/1000  | Acurácia de Treino 0.1323 | Acurácia de Teste 0.1338 | ||W|| 9.1889 | Perda treino 114217.8730 
Tempo  epoch: 		5.2764763832092285 (s)
FDIPA: Epoch 2/1000  | Acurácia de Treino 0.4592 | Acurácia de Teste 0.4628 | ||W|| 9.1223 | Perda treino 106985.2133 
Tempo  epoch: 		5.331613779067993 (s)
FDIPA: Epoch 3/1000  | Acurácia de Treino 0.5323 | Acurácia de Teste 0.5365 | ||W|| 9.9585 | Perda treino 76828.2676 
Tempo  epoch: 		5.266418218612671 (s)
FDIPA: Epoch 4/1000  | Acurácia de Treino 0.6406 | Acurácia de Teste 0.6418 | ||W|| 10.9581 | Perda treino 56382.3554 
Tempo  epoch: 		5.178624629974365 (s)
FDIPA: Epoch 5/1000  | Acurácia de Treino 0.7187 | Acurácia de Teste 0.7170 | ||W|| 11.6200 | Perda treino 46717.2552 
Tempo  epoch: 		5.231877565383911 (s)
FDIPA: Epoch 6/1000  | Acurácia de Treino 0.7582 | Acurácia de Teste 0.7532 | ||W|| 12.0364 | Perda treino 41698.0368 
Tempo  epoch: 		5.239737510681152 (s)
FDIPA: Epoch 7/1000  | Acurácia de Treino 0.7830 | Acuráci

KeyboardInterrupt: ignored

# Teste

In [None]:
# Arquitetura da rede
# net = NeuralNet([2, 10, 1])
# net = NeuralNet([784, 100, 100, 10]) #10300 tempo = 378
net = NeuralNet([784, 32, 32, 10]) #10300 tempo = 378
# net = NeuralNet([2, 50, 50, 50, 50, 1])  # 7650 tempo = 152
# net = NeuralNet([2, 70, 30, 30, 70, 1])  # 5310 tempo = 158
# net = NeuralNet([2, 80, 20, 20, 80, 1])  # tempo = 167


epochs = 1000
mini_batch_size = 100
eta = 0.5
iteracoes = 1
raio = 50

print("Comeco:")
ini = time.time()
# net.SGD(X_train, y_train, epochs, mini_batch_size, eta, X_test, y_test)
net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, X_test, y_test)
end = time.time()
print("Fim")
print("Tempo: \t\t" + str(end-ini) + " (s)" )


Comeco:




Tempo  epoch: 		5.87241005897522 (s)
FDIPA: Epoch 1/1000  | Acurácia de Treino 0.7442 | Acurácia de Teste 0.7409 | ||W|| 162.1492 
Tempo  epoch: 		5.767901659011841 (s)
FDIPA: Epoch 2/1000  | Acurácia de Treino 0.8170 | Acurácia de Teste 0.8051 | ||W|| 160.5840 
Tempo  epoch: 		5.772760391235352 (s)
FDIPA: Epoch 3/1000  | Acurácia de Treino 0.8481 | Acurácia de Teste 0.8309 | ||W|| 159.2577 
Tempo  epoch: 		5.797125816345215 (s)
FDIPA: Epoch 4/1000  | Acurácia de Treino 0.8673 | Acurácia de Teste 0.8499 | ||W|| 158.0557 
Tempo  epoch: 		5.741736650466919 (s)
FDIPA: Epoch 5/1000  | Acurácia de Treino 0.8799 | Acurácia de Teste 0.8589 | ||W|| 156.9152 
Tempo  epoch: 		5.675878286361694 (s)
FDIPA: Epoch 6/1000  | Acurácia de Treino 0.8870 | Acurácia de Teste 0.8641 | ||W|| 155.8646 
Tempo  epoch: 		5.800960063934326 (s)
FDIPA: Epoch 7/1000  | Acurácia de Treino 0.8969 | Acurácia de Teste 0.8713 | ||W|| 154.8634 
Tempo  epoch: 		5.682424306869507 (s)
FDIPA: Epoch 8/1000  | Acurácia de Trei

In [None]:
# Arquitetura da rede
# net = NeuralNet([2, 10, 1])
# net = NeuralNet([784, 100, 100, 10]) #10300 tempo = 378
net = NeuralNet([784, 32, 32, 10]) #10300 tempo = 378
# net = NeuralNet([2, 50, 50, 50, 50, 1])  # 7650 tempo = 152
# net = NeuralNet([2, 70, 30, 30, 70, 1])  # 5310 tempo = 158
# net = NeuralNet([2, 80, 20, 20, 80, 1])  # tempo = 167


epochs = 1000
mini_batch_size = 100
eta = 0.5
iteracoes = 1
raio = 50

print("Comeco:")
ini = time.time()
net.SGD(X_train, y_train, epochs, mini_batch_size, eta, X_test, y_test)
# net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio,X_test, y_test)
end = time.time()
print("Fim")
print("Tempo: \t\t" + str(end-ini) + " (s)" )


Comeco:




Tempo  epoch: 		2.2090225219726562 (s)
Epoch 1/1000  | Acurácia de Treino 0.7535 | Acurácia de Teste 0.7434 | ||W|| 165.2823
Tempo  epoch: 		2.1181766986846924 (s)
Epoch 2/1000  | Acurácia de Treino 0.8127 | Acurácia de Teste 0.8016 | ||W|| 165.3408
Tempo  epoch: 		2.158229112625122 (s)
Epoch 3/1000  | Acurácia de Treino 0.8439 | Acurácia de Teste 0.8272 | ||W|| 165.3807
Tempo  epoch: 		2.172633171081543 (s)
Epoch 4/1000  | Acurácia de Treino 0.8614 | Acurácia de Teste 0.8420 | ||W|| 165.4201
Tempo  epoch: 		2.174922466278076 (s)
Epoch 5/1000  | Acurácia de Treino 0.8739 | Acurácia de Teste 0.8508 | ||W|| 165.4585
Tempo  epoch: 		2.1505091190338135 (s)
Epoch 6/1000  | Acurácia de Treino 0.8835 | Acurácia de Teste 0.8584 | ||W|| 165.5088
Tempo  epoch: 		2.1804165840148926 (s)
Epoch 7/1000  | Acurácia de Treino 0.8908 | Acurácia de Teste 0.8658 | ||W|| 165.5583
Tempo  epoch: 		2.1222264766693115 (s)
Epoch 8/1000  | Acurácia de Treino 0.8954 | Acurácia de Teste 0.8694 | ||W|| 165.6124
Tem

In [None]:
6462.111921072006 - 2717.5862469673157 

3744.5256741046906

In [None]:
2717.5862469673157 / 6462.111921072006

0.4205415010076911

In [None]:
6462.111921072006 / 2717.5862469673157

2.3778866000236003

In [None]:
# Arquitetura da rede
# net = NeuralNet([2, 10, 1])
# net = NeuralNet([784, 100, 100, 10]) #10300 tempo = 378
net = NeuralNet([784, 32, 32, 10]) #10300 tempo = 378
# net = NeuralNet([2, 50, 50, 50, 50, 1])  # 7650 tempo = 152
# net = NeuralNet([2, 70, 30, 30, 70, 1])  # 5310 tempo = 158
# net = NeuralNet([2, 80, 20, 20, 80, 1])  # tempo = 167


epochs = 1000
mini_batch_size = 100
eta = 0.05
iteracoes = 10
raio = 10

print("Comeco:")
ini = time.time()
net.SGD(X_train, y_train, epochs, mini_batch_size, eta, X_test, y_test)
# net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
end = time.time()
print("Fim")
print("Tempo: \t\t" + str(end-ini) + " (s)" )


||W_inicial|| =  165.20284740914278
Comeco:




Tempo  epoch: 		2.1758689880371094 (s)
Epoch 1/1000  | Acurácia de Treino 0.4203 | Acurácia de Teste 0.4199 | ||W|| 165.0502
Tempo  epoch: 		2.0934267044067383 (s)
Epoch 2/1000  | Acurácia de Treino 0.5453 | Acurácia de Teste 0.5450 | ||W|| 165.0237
Tempo  epoch: 		2.0952396392822266 (s)
Epoch 3/1000  | Acurácia de Treino 0.6109 | Acurácia de Teste 0.6013 | ||W|| 165.0187
Tempo  epoch: 		2.1073381900787354 (s)
Epoch 4/1000  | Acurácia de Treino 0.6540 | Acurácia de Teste 0.6434 | ||W|| 165.0204
Tempo  epoch: 		2.1568660736083984 (s)
Epoch 5/1000  | Acurácia de Treino 0.6833 | Acurácia de Teste 0.6750 | ||W|| 165.0247
Tempo  epoch: 		2.1077914237976074 (s)
Epoch 6/1000  | Acurácia de Treino 0.7057 | Acurácia de Teste 0.6934 | ||W|| 165.0302
Tempo  epoch: 		2.117575168609619 (s)
Epoch 7/1000  | Acurácia de Treino 0.7241 | Acurácia de Teste 0.7115 | ||W|| 165.0356
Tempo  epoch: 		2.0791995525360107 (s)
Epoch 8/1000  | Acurácia de Treino 0.7374 | Acurácia de Teste 0.7274 | ||W|| 165.0408
T

KeyboardInterrupt: ignored

In [None]:
# Arquitetura da rede
# net = NeuralNet([2, 10, 1])
# net = NeuralNet([784, 100, 100, 10]) #10300 tempo = 378
net = NeuralNet([784, 32, 32, 10]) #10300 tempo = 378
# net = NeuralNet([2, 50, 50, 50, 50, 1])  # 7650 tempo = 152
# net = NeuralNet([2, 70, 30, 30, 70, 1])  # 5310 tempo = 158
# net = NeuralNet([2, 80, 20, 20, 80, 1])  # tempo = 167


epochs = 1000
mini_batch_size = 100
eta = 0.05
iteracoes = 1
raio = 180

print("Comeco:")
ini = time.time()
# net.SGD(X_train, y_train, epochs, mini_batch_size, eta, X_test, y_test)
net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
end = time.time()
print("Fim")
print("Tempo: \t\t" + str(end-ini) + " (s)" )


||W_inicial|| =  164.57492807238194
Comeco:




Tempo  epoch: 		7.534519672393799 (s)
FDIPA: Epoch 1/1000  | Acurácia de Treino 0.3497 | Acurácia de Teste 0.3523 | ||W|| 164.1075 
Tempo  epoch: 		5.643287658691406 (s)
FDIPA: Epoch 2/1000  | Acurácia de Treino 0.4924 | Acurácia de Teste 0.4881 | ||W|| 163.9125 
Tempo  epoch: 		5.652761936187744 (s)
FDIPA: Epoch 3/1000  | Acurácia de Treino 0.5735 | Acurácia de Teste 0.5726 | ||W|| 163.7438 
Tempo  epoch: 		5.679826021194458 (s)
FDIPA: Epoch 4/1000  | Acurácia de Treino 0.6237 | Acurácia de Teste 0.6221 | ||W|| 163.5928 
Tempo  epoch: 		5.613228797912598 (s)
FDIPA: Epoch 5/1000  | Acurácia de Treino 0.6573 | Acurácia de Teste 0.6568 | ||W|| 163.4480 
Tempo  epoch: 		5.662621021270752 (s)
FDIPA: Epoch 6/1000  | Acurácia de Treino 0.6835 | Acurácia de Teste 0.6807 | ||W|| 163.3127 
Tempo  epoch: 		5.65277361869812 (s)
FDIPA: Epoch 7/1000  | Acurácia de Treino 0.7058 | Acurácia de Teste 0.7042 | ||W|| 163.1833 
Tempo  epoch: 		5.57337498664856 (s)
FDIPA: Epoch 8/1000  | Acurácia de Trein

KeyboardInterrupt: ignored

In [None]:
# Arquitetura da rede
# net = NeuralNet([2, 10, 1])
# net = NeuralNet([784, 100, 100, 10]) #10300 tempo = 378
net = NeuralNet([784, 32, 32, 10]) #10300 tempo = 378
# net = NeuralNet([2, 50, 50, 50, 50, 1])  # 7650 tempo = 152
# net = NeuralNet([2, 70, 30, 30, 70, 1])  # 5310 tempo = 158
# net = NeuralNet([2, 80, 20, 20, 80, 1])  # tempo = 167


epochs = 1000
mini_batch_size = 100
eta = 0.5
iteracoes = 1
raio = 180

print("Comeco:")
ini = time.time()
# net.SGD(X_train, y_train, epochs, mini_batch_size, eta, X_test, y_test)
net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
end = time.time()
print("Fim")
print("Tempo: \t\t" + str(end-ini) + " (s)" )

||W_inicial|| =  165.92521385722864
Comeco:




Tempo  epoch: 		5.800873041152954 (s)
FDIPA: Epoch 1/1000  | Acurácia de Treino 0.7625 | Acurácia de Teste 0.7515 | ||W|| 163.8565 
Tempo  epoch: 		5.734825611114502 (s)
FDIPA: Epoch 2/1000  | Acurácia de Treino 0.8203 | Acurácia de Teste 0.8120 | ||W|| 162.6900 
Tempo  epoch: 		5.681602716445923 (s)
FDIPA: Epoch 3/1000  | Acurácia de Treino 0.8475 | Acurácia de Teste 0.8393 | ||W|| 161.7154 
Tempo  epoch: 		5.630529403686523 (s)
FDIPA: Epoch 4/1000  | Acurácia de Treino 0.8646 | Acurácia de Teste 0.8495 | ||W|| 160.8412 
Tempo  epoch: 		5.637929201126099 (s)
FDIPA: Epoch 5/1000  | Acurácia de Treino 0.8762 | Acurácia de Teste 0.8561 | ||W|| 160.0373 
Tempo  epoch: 		5.642376184463501 (s)
FDIPA: Epoch 6/1000  | Acurácia de Treino 0.8854 | Acurácia de Teste 0.8680 | ||W|| 159.2908 
Tempo  epoch: 		5.624579668045044 (s)
FDIPA: Epoch 7/1000  | Acurácia de Treino 0.8926 | Acurácia de Teste 0.8702 | ||W|| 158.6068 
Tempo  epoch: 		5.649409770965576 (s)
FDIPA: Epoch 8/1000  | Acurácia de Tre

KeyboardInterrupt: ignored

In [None]:
raio = 50
# Arquitetura da rede
# net = NeuralNet([2, 10, 1])
# net = NeuralNet([784, 100, 100, 10]) #10300 tempo = 378
net = NeuralNet([784, 32, 32, 10], raio) #10300 tempo = 378
# net = NeuralNet([2, 50, 50, 50, 50, 1])  # 7650 tempo = 152
# net = NeuralNet([2, 70, 30, 30, 70, 1])  # 5310 tempo = 158
# net = NeuralNet([2, 80, 20, 20, 80, 1])  # tempo = 167


epochs = 1000
mini_batch_size = 100
eta = 0.5
iteracoes = 1
# raio = 50

print("Comeco:")
ini = time.time()
# net.SGD(X_train, y_train, epochs, mini_batch_size, eta, X_test, y_test)
net.trainFDIPA(X_train, y_train, epochs, mini_batch_size, eta, iteracoes, raio, X_test, y_test)
end = time.time()
print("Fim")
print("Tempo: \t\t" + str(end-ini) + " (s)" )


||W_inicial|| =  17.840629304855476
Comeco:
t = 0.021703114884691437
t = 0.02439549487892112
t = 0.03047168831038455
t = 0.04288327614819341
t = 0.06969934331944314
t = 0.10443143488125528
t = 0.18778529023408155
t = 0.2383403950900183
t = 0.1978889010161166
t = 0.27459527949212553
t = 0.23813229691807594
t = 0.2676426963548111
t = 0.17322264105137855
t = 0.29950887823875993
t = 0.21043758958781858
t = 0.2954440297419371
t = 0.273535322456875
t = 0.33706584925174815
t = 0.2885170124441439
t = 0.4284340373590374
t = 0.1993834108398843
t = 0.17681939215010484
t = 0.1274670128966167
t = 0.1189269848951884
t = 0.1020061741313951
t = 0.10430231857930716
t = 0.09299270296677971
t = 0.10278175194258458
t = 0.123120276408237
t = 0.09953735233805378
t = 0.11551144426642804
t = 0.10086460563248692
t = 0.10252805598196124
t = 0.0988629826000457
t = 0.10057871009187908
t = 0.1114789148782811
t = 0.09752495170075169
t = 0.09774066987403039
t = 0.09693902443266555
t = 0.10233503922789525
t = 0.09078



t = 0.09533981483662828
t = 0.10572913920100208
t = 0.0956286507942281
t = 0.11016065346415914
t = 0.10795876552766223
t = 0.0995518938514805
t = 0.09778798137632227
t = 0.09930972301691711
t = 0.10275430137930153
t = 0.09912841687284782
t = 0.06777035487945224
t = 0.11042112357549062
t = 0.12374176399015976
t = 0.10447528450039296
t = 0.08389334231211329
t = 0.10253376522351323
t = 0.10800569802352644
t = 0.10142340690277625
t = 0.0978106334654258
t = 0.09829901784461907
t = 0.10166592963157081
t = 0.11275386543802675
t = 0.10621358507148723
t = 0.10373650514973215
t = 0.0954508563468
t = 0.09492145962578522
t = 0.11005130138848368
t = 0.09436018520095779
t = 0.11219107905103488
t = 0.09781489000726128
t = 0.09741792716275043
t = 0.10410941321955194
t = 0.09906925589056863
t = 0.10040297520364289
t = 0.1017568583875054
t = 0.10128339260268687
t = 0.09800639805646245
t = 0.10006462212787332
t = 0.10070646838760798
t = 0.10656388279109826
t = 0.09931134967512426
t = 0.09737012294083669


KeyboardInterrupt: ignored

In [None]:
# np.sqrt((-139.68123793388185)**2 - 4*(1746.320585914111)*(-206.3563618781095))
((139.68123793388185) + np.sqrt((-139.68123793388185)**2 - 4*(1746.320585914111)*(-206.3563618781095)) )/ 2*1746.320585914111

1177357.5111988913

In [None]:
np.linalg.norm(0*np.random.randn(5,1)+1)

2.23606797749979

In [None]:
np.random.randn(1,2)

array([[ 0.78479219, -0.82032868]])

In [None]:
lista = [np.array([[1,2,3],[1,4,5]]), np.array([[1,2,3]])]

In [None]:
mat2vet(lista)

array([[1],
       [2],
       [3],
       [1],
       [4],
       [5],
       [1],
       [2],
       [3]])

In [None]:
r = 50


TypeError: ignored

In [None]:
radius = 50**2
pesos = np.random.rand(64, 1)
pesos = pesos / np.linalg.norm(pesos)
pesos = (50-1)*(np.random.rand()) * pesos
np.linalg.norm(pesos)
# weights = [np.random.randn(y,x) for x, y in zip(self.sizes_com_bias[:-1], self.sizes_com_bias[1:])]


26.203487286474328

In [None]:
radius

2500

In [None]:
np.random.rand()

0.18848547967305684