1.Weight initialization

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras

In [3]:
[name for name in dir(keras.initializers) if not name.startswith('_')]

['Constant',
 'GlorotNormal',
 'GlorotUniform',
 'HeNormal',
 'HeUniform',
 'Identity',
 'Initializer',
 'LecunNormal',
 'LecunUniform',
 'Ones',
 'Orthogonal',
 'RandomNormal',
 'RandomUniform',
 'TruncatedNormal',
 'VarianceScaling',
 'Zeros',
 'constant',
 'deserialize',
 'get',
 'glorot_normal',
 'glorot_uniform',
 'he_normal',
 'he_uniform',
 'identity',
 'lecun_normal',
 'lecun_uniform',
 'ones',
 'orthogonal',
 'random_normal',
 'random_uniform',
 'serialize',
 'truncated_normal',
 'variance_scaling',
 'zeros']

In [4]:
keras.layers.Dense(units=10,
                   activation='relu',
                   kernel_initializer='he_normal')

<keras.src.layers.core.dense.Dense at 0x167d76790>

2. Activation function

In [5]:
[name for name in dir(keras.activations) if not name.startswith('_')]

['deserialize',
 'elu',
 'exponential',
 'gelu',
 'get',
 'hard_sigmoid',
 'linear',
 'mish',
 'relu',
 'selu',
 'serialize',
 'sigmoid',
 'softmax',
 'softplus',
 'softsign',
 'swish',
 'tanh']

In [6]:
keras.layers.Dense(10,
                  activation='selu',
                  kernel_initializer='lecun_normal')

<keras.src.layers.core.dense.Dense at 0x168151b20>

Method 2 keras.layers

In [8]:
[name for name in dir(keras.layers) if 'elu' in name.lower()]

['ELU', 'LeakyReLU', 'PReLU', 'ReLU', 'ThresholdedReLU']

In [12]:
keras.layers.Dense(300,
                  kernel_initializer='he_normal'),
keras.layers.LeakyReLU(alpha=0.3),

(<keras.src.layers.activation.leaky_relu.LeakyReLU at 0x168afca90>,)

In [13]:
keras.layers.Dense(300,
                  kernel_initializer='he_normal'),
keras.layers.Activation('relu'),

(<keras.src.layers.core.activation.Activation at 0x168afc790>,)

ex1

In [19]:
#Load data
from tensorflow.keras.datasets import fashion_mnist
(x_train_set,y_train_set),(x_test,y_test)=fashion_mnist.load_data()
#Split data
from sklearn.model_selection import train_test_split
x_train,x_valid,y_train,y_valid=train_test_split(
    x_train_set,y_train_set,random_state=1)
#Preprocessing
x_train=x_train/255
x_valid=x_valid/255
x_test=x_test/255

In [20]:
tf.keras.backend.clear_session()
np.random.seed(1)
tf.random.set_seed(1)

In [24]:
model=keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.Dense(300,activation='relu',kernel_initializer='he_normal'),
    keras.layers.Dense(200,kernel_initializer='he_normal'),
    keras.layers.LeakyReLU(alpha=0.01),
    keras.layers.Dense(100,kernel_initializer='he_normal'),
    keras.layers.PReLU(),
    keras.layers.Dense(10,activation='softmax')])

In [25]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 dense_1 (Dense)             (None, 200)               60200     
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 200)               0         
                                                                 
 dense_2 (Dense)             (None, 100)               20100     
                                                                 
 p_re_lu (PReLU)             (None, 100)               100       
                                                                 
 dense_3 (Dense)             (None, 10)                1

In [28]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='sgd',metrics=['accuracy'])

In [29]:
train=model.fit(x_train,y_train,
                epochs=2,
                validation_data=(x_valid,y_valid))

Epoch 1/2
Epoch 2/2


3.BatchNormalization

In [31]:
keras.layers.Dense(300,activation='relu'),
keras.layers.BatchNormalization(),

(<keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x2c0139d00>,)

In [32]:
model=keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(200,activation='relu',kernel_initializer='he_normal'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(100,activation='relu',kernel_initializer='he_normal'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(10,activation='softmax')])

In [33]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization_1 (Bat  (None, 784)               3136      
 chNormalization)                                                
                                                                 
 dense_5 (Dense)             (None, 200)               157000    
                                                                 
 batch_normalization_2 (Bat  (None, 200)               800       
 chNormalization)                                                
                                                                 
 dense_6 (Dense)             (None, 100)               20100     
                                                                 
 batch_normalization_3 (Bat  (None, 100)              

In [34]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='sgd',metrics=['accuracy'])

In [35]:
train=model.fit(x_train,y_train,
                epochs=2,
                validation_data=(x_valid,y_valid))

Epoch 1/2
Epoch 2/2


Method 2 :Before activation function

In [36]:
keras.layers.Dense(300,use_bias=False),
keras.layers.BatchNormalization(),
keras.layers.Activation('relu')

<keras.src.layers.core.activation.Activation at 0x2c90ecf70>

In [37]:
model=keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(200,use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    keras.layers.Dense(100,use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.LeakyReLU(alpha=0.3),
    keras.layers.Dense(10,activation='softmax')])

In [38]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization_5 (Bat  (None, 784)               3136      
 chNormalization)                                                
                                                                 
 dense_9 (Dense)             (None, 200)               156800    
                                                                 
 batch_normalization_6 (Bat  (None, 200)               800       
 chNormalization)                                                
                                                                 
 activation_1 (Activation)   (None, 200)               0         
                                                                 
 dense_10 (Dense)            (None, 100)              

In [52]:
model=Sequential([
    Flatten(input_shape=x_train.shape[1:]),
    Dense(units=300,activation='relu'),
    Dense(units=200,activation='relu'),
    Dense(units=100,activation='relu'),
    Dense(units=10,activation='softmax'),
])

In [53]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 dense_1 (Dense)             (None, 200)               60200     
                                                                 
 dense_2 (Dense)             (None, 100)               20100     
                                                                 
 dense_3 (Dense)             (None, 10)                1010      
                                                                 
Total params: 316810 (1.21 MB)
Trainable params: 316810 (1.21 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [39]:
model.compile(loss='sparse_categorical_crossentropy',optimizer = 'sgd',metrics=['accuracy'])

In [41]:
train=model.fit(x_train,y_train,
                epochs=2,
                validation_data=(x_valid,y_valid))

Epoch 1/2
Epoch 2/2


4.Gradient Clipping

Method 1:clipvalue

In [44]:
opt = tf.keras.optimizers.legacy.SGD(clipvalue=1.0)

In [45]:
opt = tf.keras.optimizers.legacy.SGD(clipvalue=0.9)

In [47]:
opt = tf.keras.optimizers.legacy.SGD(learning_rate=1e-3,clipvalue=0.9)
model.compile(loss='sparse_categorical_crossentropy',optimizer =opt,metrics=['accuracy'])

In [48]:
train=model.fit(x_train,y_train,
                epochs=2,
                validation_data=(x_valid,y_valid))

Epoch 1/2
Epoch 2/2
