<a href="https://colab.research.google.com/github/yashveersinghsohi/Hands_On_ML_Book_Practice/blob/master/Chapter_11/Chapter11_Training_Deep_Neural_Networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Vanishing/Exploding Gradients

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
fashion_mnist = tf.keras.datasets.fashion_mnist
(X_train_val, y_train_val), (X_test, y_test) = fashion_mnist.load_data()
X_train, y_train = X_train_val[:50000], y_train_val[:50000]
X_val, y_val = X_train_val[50000:], y_train_val[50000:]
X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape

((50000, 28, 28),
 (50000,),
 (10000, 28, 28),
 (10000,),
 (10000, 28, 28),
 (10000,))

In [3]:
tf.random.set_seed(42)

model = tf.keras.models.Sequential([
  tf.keras.layers.InputLayer(shape=[28, 28]),
  tf.keras.layers.Flatten(),
  tf.keras.layers.BatchNormalization(),
  tf.keras.layers.Dense(300, activation='relu', kernel_initializer='he_normal'),
  tf.keras.layers.BatchNormalization(),
  tf.keras.layers.Dense(100, activation='relu', kernel_initializer='he_normal'),
  tf.keras.layers.BatchNormalization(),
  tf.keras.layers.Dense(10, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, clipnorm=1)
loss = 'sparse_categorical_crossentropy'
metrics = ['accuracy']

model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

model.summary()

In [4]:
[(var.name, var.trainable) for var in model.layers[1].variables]

[('gamma', True),
 ('beta', True),
 ('moving_mean', False),
 ('moving_variance', False)]

In [6]:
callbacks = [
  tf.keras.callbacks.EarlyStopping(patience=5),
  tf.keras.callbacks.ModelCheckpoint('model.keras', save_best_only=True)
]
history = model.fit(
  X_train, y_train,
  epochs=20,
  callbacks=callbacks,
  validation_data=(X_val, y_val)
)

Epoch 1/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.7964 - loss: 0.5820 - val_accuracy: 0.8653 - val_loss: 0.3668
Epoch 2/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 8ms/step - accuracy: 0.8697 - loss: 0.3602 - val_accuracy: 0.8745 - val_loss: 0.3445
Epoch 3/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.8894 - loss: 0.3058 - val_accuracy: 0.8759 - val_loss: 0.3499
Epoch 4/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 9ms/step - accuracy: 0.9044 - loss: 0.2637 - val_accuracy: 0.8767 - val_loss: 0.3559
Epoch 5/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.9177 - loss: 0.2262 - val_accuracy: 0.8818 - val_loss: 0.3682
Epoch 6/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 8ms/step - accuracy: 0.9318 - loss: 0.1915 - val_accuracy: 0.8815 - val_loss: 0.3876
Epoch 7/20

In [8]:
model.evaluate(X_test, y_test, verbose=2, return_dict=True)

313/313 - 1s - 3ms/step - accuracy: 0.8732 - loss: 0.4807


{'accuracy': 0.873199999332428, 'loss': 0.4807271957397461}