# Training Deep Neural Networks

In [1]:
import os
os.environ["KERAS_BACKEND"] = "torch"
import torch
import keras
print(keras.__version__)

3.11.3


In [2]:
if torch.cuda.is_available():
    print(f"GPU detectada: {torch.cuda.get_device_name(0)}")
    print(f"Número de GPUs disponíveis: {torch.cuda.device_count()}")
else:
    print("Nenhuma GPU detectada. Usando CPU.")

GPU detectada: NVIDIA GeForce RTX 3050 6GB Laptop GPU
Número de GPUs disponíveis: 1


## Load and Transform data

In [3]:
from keras.utils import to_categorical
import numpy as np
from sklearn.model_selection import train_test_split

# 1. Carregar o dataset
# y_train_full ainda está no formato original (ex: [[6], [9], ...])
(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.cifar10.load_data()

# 2. Pré-processamento das imagens (features)
# Achatar (Flatten) e Normalizar as imagens
num_pixels = x_train_full.shape[1] * x_train_full.shape[2] * x_train_full.shape[3]
x_train_full = x_train_full.reshape(x_train_full.shape[0], num_pixels).astype('float32') / 255.0
x_test = x_test.reshape(x_test.shape[0], num_pixels).astype('float32') / 255.0

# --- AMOSTRAGEM ALEATÓRIA E ESTRATIFICADA ---
# 3. Dividir os dados de treino em treino e validação
# Usamos train_test_split do scikit-learn para fazer isso de forma aleatória e estratificada.
# test_size=0.2 significa que 20% do x_train_full será usado para validação (10.000 amostras).
# stratify=y_train_full garante que a proporção de classes seja a mesma nos dois conjuntos.
# random_state=42 garante que a divisão seja a mesma toda vez que o código rodar (reprodutibilidade).
x_train, x_val, y_train, y_val = train_test_split(
    x_train_full,
    y_train_full,
    test_size=0.2,
    stratify=y_train_full,
    random_state=42
)

# --- PROVA DA ESTRATIFICAÇÃO (OPCIONAL, MAS RECOMENDADO) ---
# Vamos verificar a distribuição das classes
_, train_counts = np.unique(y_train, return_counts=True)
_, val_counts = np.unique(y_val, return_counts=True)

print("--- Distribuição das Classes ---")
print("No conjunto de Treino (40.000 amostras):", train_counts)
print("No conjunto de Validação (10.000 amostras):", val_counts)
# Note que os conts de validação são aproximadamente 1/4 dos conts de treino (10k/40k).
print("-" * 32)

# 4. Pré-processamento dos rótulos (y) - AGORA APLICAMOS O ONE-HOT ENCODING
# Fazemos isso depois da divisão
num_classes = 10
y_train = to_categorical(y_train, num_classes)
y_val = to_categorical(y_val, num_classes)
y_test = to_categorical(y_test, num_classes)

# --- VERIFICAÇÃO FINAL DAS DIMENSÕES ---
print("\n--- Dimensões Finais dos Conjuntos ---")
print("Conjunto de Treinamento (x):", x_train.shape)
print("Conjunto de Treinamento (y):", y_train.shape)
print("\nConjunto de Validação (x):", x_val.shape)
print("Conjunto de Validação (y):", y_val.shape)
print("\nConjunto de Teste (x):", x_test.shape)
print("Conjunto de Teste (y):", y_test.shape)
print("-" * 37)

--- Distribuição das Classes ---
No conjunto de Treino (40.000 amostras): [4000 4000 4000 4000 4000 4000 4000 4000 4000 4000]
No conjunto de Validação (10.000 amostras): [1000 1000 1000 1000 1000 1000 1000 1000 1000 1000]
--------------------------------

--- Dimensões Finais dos Conjuntos ---
Conjunto de Treinamento (x): (40000, 3072)
Conjunto de Treinamento (y): (40000, 10)

Conjunto de Validação (x): (10000, 3072)
Conjunto de Validação (y): (10000, 10)

Conjunto de Teste (x): (10000, 3072)
Conjunto de Teste (y): (10000, 10)
-------------------------------------


## Training Same Model with different activation function and initialization

### MLP 1 - Sigmoid activation and random initialization

In [4]:
((3072 * 2) - ((3072 * 2) * 0.5)) / 8

384.0

In [5]:
from keras import layers
model_1 = keras.Sequential([
    layers.Input(shape=(3072,)),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(100, activation='sigmoid'),
    layers.Dense(10, activation='softmax'),
])

#### Using SGD Optimizer and Cross Entropy Loss

In [6]:
model_1.compile(
    optimizer=keras.optimizers.SGD(),
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=[keras.metrics.CategoricalAccuracy()],
)

In [7]:
model_1.fit(
    x_train,
    y_train,
    batch_size=64,
    epochs=10,
    validation_data=(x_val, y_val),
)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 11ms/step - categorical_accuracy: 0.0989 - loss: 2.3070 - val_categorical_accuracy: 0.1000 - val_loss: 2.3033
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 11ms/step - categorical_accuracy: 0.0966 - loss: 2.3038 - val_categorical_accuracy: 0.1000 - val_loss: 2.3033
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - categorical_accuracy: 0.1006 - loss: 2.3036 - val_categorical_accuracy: 0.1000 - val_loss: 2.3033
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 11ms/step - categorical_accuracy: 0.0988 - loss: 2.3036 - val_categorical_accuracy: 0.1000 - val_loss: 2.3036
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - categorical_accuracy: 0.1018 - loss: 2.3036 - val_categorical_accuracy: 0.1000 - val_loss: 2.3035
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

<keras.src.callbacks.history.History at 0x1859e107b50>

In [8]:
model_1.evaluate(x_test, y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - categorical_accuracy: 0.1000 - loss: 2.3031


[2.3030810356140137, 0.10000000149011612]

### MLP 2 - ReLU activation and random initialization

In [9]:
from keras import layers
model_2 = keras.Sequential([
    layers.Input(shape=(3072,)),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(10, activation='softmax'),
])

In [10]:
model_2.compile(
    optimizer=keras.optimizers.SGD(),
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=[keras.metrics.CategoricalAccuracy()],
)

In [11]:
model_2.fit(
    x_train,
    y_train,
    batch_size=64,
    epochs=10,
    validation_data=(x_val, y_val),
)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - categorical_accuracy: 0.1338 - loss: 2.2977 - val_categorical_accuracy: 0.1710 - val_loss: 2.2844
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - categorical_accuracy: 0.1740 - loss: 2.1867 - val_categorical_accuracy: 0.1975 - val_loss: 2.0747
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - categorical_accuracy: 0.1965 - loss: 2.0714 - val_categorical_accuracy: 0.2048 - val_loss: 2.0317
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - categorical_accuracy: 0.2217 - loss: 2.0207 - val_categorical_accuracy: 0.2518 - val_loss: 1.9589
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - categorical_accuracy: 0.2649 - loss: 1.9395 - val_categorical_accuracy: 0.2535 - val_loss: 1.9701
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

<keras.src.callbacks.history.History at 0x18587a559f0>

In [12]:
model_2.evaluate(x_test, y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - categorical_accuracy: 0.3388 - loss: 1.7441


[1.7441376447677612, 0.33880001306533813]

### MLP 3 - ReLU activation and He initialization

In [13]:
from keras import layers
model_3 = keras.Sequential([
    layers.Input(shape=(3072,)),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(10, activation='softmax'),
])

In [14]:
model_3.compile(
    optimizer=keras.optimizers.SGD(),
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=[keras.metrics.CategoricalAccuracy()],
)

In [15]:
model_3.fit(
    x_train,
    y_train,
    batch_size=64,
    epochs=10,
    validation_data=(x_val, y_val),
)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - categorical_accuracy: 0.2372 - loss: 2.0546 - val_categorical_accuracy: 0.2978 - val_loss: 1.9076
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - categorical_accuracy: 0.3191 - loss: 1.8646 - val_categorical_accuracy: 0.3444 - val_loss: 1.8018
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - categorical_accuracy: 0.3548 - loss: 1.7832 - val_categorical_accuracy: 0.3796 - val_loss: 1.7296
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - categorical_accuracy: 0.3728 - loss: 1.7310 - val_categorical_accuracy: 0.3938 - val_loss: 1.6920
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - categorical_accuracy: 0.3911 - loss: 1.6898 - val_categorical_accuracy: 0.3855 - val_loss: 1.6869
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

<keras.src.callbacks.history.History at 0x185879b21d0>

In [16]:
model_3.evaluate(x_test, y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - categorical_accuracy: 0.4150 - loss: 1.6333


[1.633277177810669, 0.41499999165534973]

### MLP 4 - LeakyReLU activation and He initialization

In [17]:
from keras import layers
model_4 = keras.Sequential([
    layers.Input(shape=(3072,)),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='leaky_relu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(10, activation='softmax'),
])

In [18]:
model_4.compile(
    optimizer=keras.optimizers.SGD(),
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=[keras.metrics.CategoricalAccuracy()],
)

In [19]:
model_4.fit(
    x_train,
    y_train,
    batch_size=64,
    epochs=10,
    validation_data=(x_val, y_val),
)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - categorical_accuracy: 0.2594 - loss: 2.0128 - val_categorical_accuracy: 0.3028 - val_loss: 1.9164
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 11ms/step - categorical_accuracy: 0.3393 - loss: 1.8247 - val_categorical_accuracy: 0.3259 - val_loss: 1.8481
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - categorical_accuracy: 0.3711 - loss: 1.7451 - val_categorical_accuracy: 0.3953 - val_loss: 1.6975
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - categorical_accuracy: 0.3907 - loss: 1.6906 - val_categorical_accuracy: 0.3821 - val_loss: 1.7135
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - categorical_accuracy: 0.4123 - loss: 1.6374 - val_categorical_accuracy: 0.3905 - val_loss: 1.7238
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

<keras.src.callbacks.history.History at 0x185879f2140>

In [20]:
model_4.evaluate(x_test, y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - categorical_accuracy: 0.4550 - loss: 1.5279


[1.5279316902160645, 0.45500001311302185]

### MLP 5 - ELU activation and He initialization

In [21]:
from keras import layers

model_5 = keras.Sequential([
    layers.Input(shape=(3072,)),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(100, activation='elu', kernel_initializer='he_normal', bias_initializer='he_normal'),
    layers.Dense(10, activation='softmax'),
])

### Experiment with different Optimizers

#### SGD

In [22]:
model_5.compile(
    optimizer=keras.optimizers.SGD(learning_rate=0.001),
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=[keras.metrics.CategoricalAccuracy()],
)

ValueError: Argument(s) not recognized: {'lr': 0.001}

In [23]:
model_5.fit(
    x_train,
    y_train,
    batch_size=64,
    epochs=10,
    validation_data=(x_val, y_val),
)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m317s[0m 508ms/step - categorical_accuracy: 0.1000 - loss: 14.4895 - val_categorical_accuracy: 0.1000 - val_loss: 14.5063
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 510ms/step - categorical_accuracy: 0.1000 - loss: 14.5063 - val_categorical_accuracy: 0.1000 - val_loss: 14.5063
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m320s[0m 512ms/step - categorical_accuracy: 0.1000 - loss: 14.5063 - val_categorical_accuracy: 0.1000 - val_loss: 14.5063
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m320s[0m 513ms/step - categorical_accuracy: 0.1000 - loss: 14.5063 - val_categorical_accuracy: 0.1000 - val_loss: 14.5063
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m321s[0m 514ms/step - categorical_accuracy: 0.1000 - loss: 14.5063 - val_categorical_accuracy: 0.1000 - val_loss: 14.5063
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x1fd24903fa0>

In [24]:
model_5.evaluate(x_test, y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 67ms/step - categorical_accuracy: 0.1000 - loss: 14.5063


[14.50626277923584, 0.10000000149011612]

#### Momentum

In [29]:
model_5.compile(
    optimizer=keras.optimizers.SGD(learning_rate=0.001, momentum=0.9),
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=[keras.metrics.CategoricalAccuracy()],
)

In [30]:
model_5.fit(
    x_train,
    y_train,
    batch_size=64,
    epochs=10,
    validation_data=(x_val, y_val),
)

Epoch 1/10
[1m 27/625[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10:04[0m 1s/step - categorical_accuracy: 0.1115 - loss: 14.3215

KeyboardInterrupt: 

In [None]:
model_5.evaluate(x_test, y_test)

#### RmsProp

In [32]:
model_5.compile(
    optimizer=keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9),
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=[keras.metrics.CategoricalAccuracy()],
)

In [33]:
model_5.fit(
    x_train,
    y_train,
    batch_size=64,
    epochs=10,
    validation_data=(x_val, y_val),
)

Epoch 1/10
[1m 14/625[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m19:13[0m 2s/step - categorical_accuracy: 0.0986 - loss: 14.5290

KeyboardInterrupt: 

In [None]:
model_5.evaluate(x_test, y_test)

#### Adam

In [35]:
model_5.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=[keras.metrics.CategoricalAccuracy()],
)

In [36]:
model_5.fit(
    x_train,
    y_train,
    batch_size=64,
    epochs=10,
    validation_data=(x_val, y_val),
)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1816s[0m 3s/step - categorical_accuracy: 0.1000 - loss: 14.5063 - val_categorical_accuracy: 0.1000 - val_loss: 14.5063
Epoch 2/10
[1m 58/625[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m26:10[0m 3s/step - categorical_accuracy: 0.0884 - loss: 14.6927

KeyboardInterrupt: 

In [None]:
model_5.evaluate(x_test, y_test)

### MLP 6 - SELU activation and LeCun initialization

In [25]:
from keras import layers
model_6 = keras.Sequential([
    layers.Input(shape=(3072,)),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(100, activation='selu', kernel_initializer='lecun_normal', bias_initializer='lecun_normal'),
    layers.Dense(10, activation='softmax')
])

In [26]:
model_6.compile(
    optimizer=keras.optimizers.SGD(),
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=[keras.metrics.CategoricalAccuracy()],
)

In [27]:
model_6.fit(
    x_train,
    y_train,
    batch_size=64,
    epochs=10,
    validation_data=(x_val, y_val),
)

Epoch 1/10
[1m  2/625[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6:55[0m 667ms/step - categorical_accuracy: 0.1406 - loss: 4.9091

KeyboardInterrupt: 

In [None]:
model_6.evaluate(x_test, y_test)