# EJEMPLOS PARA EVITAR EL DESVANECIMIENTO DEL GRADIENTE

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

2024-12-19 19:47:39.406636: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## USO DE FUNCIONES DE ACTIVACIÓN APROPIADAS

### LEAKY RELU

In [2]:
leaky_relu = tf.keras.layers.LeakyReLU(alpha=0.2)
dense = tf.keras.layers.Dense(50, activation=leaky_relu)



In [3]:
model = tf.keras.models.Sequential(
    [
        # [...]  # más capas
        tf.keras.layers.Dense(50),  # sin activacion
        tf.keras.layers.LeakyReLU(alpha=0.2),  # activación como una capa separada
        # [...]  # más capas
    ]
)

## INICIALIZACIÓN ADECUADA DE PESOS

In [4]:
dense = tf.keras.layers.Dense(50, activation="relu", kernel_initializer="he_normal")

## NORMALIZACIÓN POR LOTES

In [5]:
fashion_mnist = tf.keras.datasets.fashion_mnist.load_data()
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist
X_train, y_train = X_train_full[:-5000], y_train_full[:-5000]
X_valid, y_valid = X_train_full[-5000:], y_train_full[-5000:]
X_train, X_valid, X_test = X_train / 255, X_valid / 255, X_test / 255

In [6]:
class_names = ["T-shirt/top","Trouser","Pullover","Dress", "Coat","Sandal","Shirt","Sneaker","Bag","Ankle boot"]

In [7]:
tf.keras.backend.clear_session()
tf.random.set_seed(42)

In [8]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28, 28)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(300, activation="relu", kernel_initializer="he_normal"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(100, activation="relu",kernel_initializer="he_normal"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation="softmax")
])

In [9]:
model.summary()

In [11]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="sgd", metrics=["accuracy",])
model.fit(X_train, y_train, epochs=2, validation_data=(X_valid, y_valid))

Epoch 1/2
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.7539 - loss: 0.7255 - val_accuracy: 0.8542 - val_loss: 0.4064
Epoch 2/2
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - accuracy: 0.8576 - loss: 0.4056 - val_accuracy: 0.8684 - val_loss: 0.3749


<keras.src.callbacks.history.History at 0x154e6c3e0>

A veces apicar BN antes de la función de activación funciona mejor

In [12]:
tf.keras.backend.clear_session()
tf.random.set_seed(42)

In [13]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(28, 28)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(300, kernel_initializer="he_normal", use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation("relu"),
    tf.keras.layers.Dense(100, kernel_initializer="he_normal", use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation("relu"),
    tf.keras.layers.Dense(10, activation="softmax")
])

In [14]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="sgd",metrics=["accuracy",])
model.fit(X_train, y_train, epochs=2, validation_data=(X_valid, y_valid))

Epoch 1/2
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - accuracy: 0.7285 - loss: 0.8112 - val_accuracy: 0.8450 - val_loss: 0.4317
Epoch 2/2
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8449 - loss: 0.4396 - val_accuracy: 0.8592 - val_loss: 0.3854


<keras.src.callbacks.history.History at 0x154e74500>