## A bit deeper:
- Weight Initialization
- Batch Normalization
- l1 and l2 regularizations
- Dropout (Regularization)

In [1]:
import matplotlib.pyplot as plt

import numpy as np
from keras.models import Model
from keras.layers import Input, Dense,BatchNormalization, Dropout

Using TensorFlow backend.


### Initialization

In [14]:
def init1(shape, dtype=None):
    print(shape)
#     return np.array([2]).reshape(shape).astype(dtype)
#     return np.random.normal(0,1,shape).reshape(shape).astype(dtype)
    return np.zeros(shape).reshape(shape).astype(dtype)

def init2(shape, dtype=None):
    print(shape)
#     return np.array([1]).reshape(shape).astype(dtype)
#     return np.random.normal(0,1,shape).reshape(shape).astype(dtype)
    return np.ones(shape).reshape(shape).astype(dtype)

In [15]:
x1 = Input(shape=(1,))
x2 = Dense(10, kernel_initializer=init1, bias_initializer=init2)(x1)

model = Model(inputs=x1,outputs=x2)
model.compile(optimizer = 'sgd', loss = 'mse')
model.summary()

(1, 10)
(10,)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, 1)                 0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                20        
Total params: 20
Trainable params: 20
Non-trainable params: 0
_________________________________________________________________


In [16]:
model.predict([1])

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]], dtype=float32)

### Batch Normalization

![](https://www.researchgate.net/profile/Tali_Leibovich-Raveh/publication/325868989/figure/fig2/AS:639475206074368@1529474178211/A-Basic-sigmoid-function-with-two-parameters-c1-and-c2-as-commonly-used-for-subitizing.png)

In [17]:
def init1(shape, dtype=None):
    print(shape)
    return np.array([2,2,2]).reshape(shape).astype(dtype)

x1 = Input(shape=(3,))
x2 = Dense(1, kernel_initializer=init1)(x1)
x3 = BatchNormalization()(x2)

model = Model(inputs=x1,outputs=x3)
model.compile(optimizer = 'sgd', loss = 'mse')
model.summary()

(3, 1)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         (None, 3)                 0         
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 4         
_________________________________________________________________
batch_normalization_1 (Batch (None, 1)                 4         
Total params: 8
Trainable params: 6
Non-trainable params: 2
_________________________________________________________________


### Dropout

![](https://miro.medium.com/max/1200/1*iWQzxhVlvadk6VAJjsgXgg.png)

In [36]:
def init1(shape, dtype=None):
    print(shape)
#     return np.array(10*[2]).reshape(shape).astype(dtype)
    return np.arange(shape[0]*shape[1]).reshape(shape).astype(dtype)

x1 = Input(shape=(10,))
x2 = Dense(10, kernel_initializer=init1)(x1)
x3 = Dropout(0.5)(x2, training=True)
x4 = Dense(1, kernel_initializer=init1)(x3)
x5 = Dropout(0.5)(x4, training=True)

model = Model(inputs=x1,outputs=x5)
model.compile(optimizer = 'sgd', loss = 'mse')
model.summary()

(10, 10)
(10, 1)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        (None, 10)                0         
_________________________________________________________________
dense_16 (Dense)             (None, 10)                110       
_________________________________________________________________
dropout_5 (Dropout)          (None, 10)                0         
_________________________________________________________________
dense_17 (Dense)             (None, 1)                 11        
_________________________________________________________________
dropout_6 (Dropout)          (None, 1)                 0         
Total params: 121
Trainable params: 121
Non-trainable params: 0
_________________________________________________________________


In [47]:
model.predict(np.array(10*[1]).reshape(1,10))

array([[39280.]], dtype=float32)

In [36]:
# def init1(shape, dtype=None):
#     print(shape)
#     nn = shape[0]*shape[1]
#     ws = np.array(nn*[1./nn]).reshape(shape).astype(dtype)
# #     print(ws)
#     return ws

# x1 = Input(shape=(1000,))
# x2 = Dense(1, kernel_initializer=init1)(x1)
# x3 = BatchNormalization()(x2)

# model = Model(inputs=x1,outputs=x3)
# model.compile(optimizer = 'sgd', loss = 'mse')
# model.summary()

In [37]:
# x = np.random.normal(100,1,1000)
# y = model.predict(x.reshape(1,1000))
# y.mean()

In [38]:
# hist, bin_edges = np.histogram(y.reshape(-1),100)
# bin_c = 0.5*(bin_edges[1:]+bin_edges[:1])

In [39]:
# plt.plot(bin_c,hist)
# # plt.xlim(-5,5)

### Regularizers

![](https://miro.medium.com/max/2546/1*zMLv7EHYtjfr94JOBzjqTA.png)

In [None]:
from keras.regularizers import l1,l2,l1_l2

In [None]:
from keras import regularizers
model.add(Dense(64, input_dim=64,
                kernel_regularizer=l2(0.01),
                bias_regularizer=l1(0.01)))

In [None]:
l1(0.)
l2(0.)
l1_l2(l1=0.01, l2=0.01)

In [None]:
from keras import backend as K

def l1_reg(weight_matrix):
    return 0.01 * K.sum(K.abs(weight_matrix))

model.add(Dense(64, input_dim=64,
                kernel_regularizer=l1_reg))
