# Test Xavier Weight Initialization
First try to solve the model using Xavier weight initialization, then using random weight initialization. This shows using Xavier initialization gives better convergence.

In [32]:
import syft
import syft.nn as nn

import imp
imp.reload(syft)
imp.reload(syft.nn)

import numpy as np
from syft import FloatTensor

In [33]:
lin = nn.Linear(250,100, initializer="Xavier")

model = nn.Sequential([
    lin,
    nn.Tanh(),
    nn.Linear(100,50, initializer="Xavier"),
    nn.Tanh(),
    nn.Linear(50,50, initializer="Xavier"),
    nn.Tanh(),
    nn.Linear(50,200, initializer="Xavier"),
    nn.Sigmoid()
])
np.random.seed(123)
input = FloatTensor(np.random.rand(100,250), autograd=True)
np.random.seed(123)
target = FloatTensor(np.random.randint(0, 1, (100,200)), autograd=True)
np.random.seed(123)
grad = FloatTensor(0.01*np.ones((100,200)), autograd=False)

In [34]:
for param in model.parameters():
    param_np = param.to_numpy()
    if param_np.shape[0] != 1: # Skip bias weights
        print("Shape of weights {} - Sqrt of 1/(# inputs) {:4f}".format(param_np.shape, np.sqrt(1.0/param_np.shape[0])))
        print("Mean value {:2f} max value {:4f} min value {:4f} \n".format(np.mean(param_np), np.max(param_np), np.min(param_np)))

Shape of weights (250, 100) - Sqrt of 1/(# inputs) 0.063246
Mean value 0.000010 max value 0.063240 min value -0.063228 

Shape of weights (100, 50) - Sqrt of 1/(# inputs) 0.100000
Mean value -0.000313 max value 0.099975 min value -0.099972 

Shape of weights (50, 50) - Sqrt of 1/(# inputs) 0.141421
Mean value -0.001772 max value 0.141386 min value -0.141382 

Shape of weights (50, 200) - Sqrt of 1/(# inputs) 0.141421
Mean value 0.000441 max value 0.141386 min value -0.141382 



In [35]:
for i in range(15):
    pred = model(input)
    loss = (pred - target) ** 2
    loss.backward(grad)
    # note: zeroing out gradients has to happen by hand
    for p in model.parameters():
        p -= p.grad()
    print(loss.to_numpy().sum())

4975.3066489
2817.99148144
24.6610177812
9.70027632113
7.98010182634
6.89689011504
6.12607268282
5.53921544536
5.07230684267
4.68899085584
4.36674678708
4.09072019851
3.85062905697
3.63908115758
3.45059191323


** Without using Xavier Initialization **

In [39]:
lin = nn.Linear(250,100, initializer="x")

model = nn.Sequential([
    lin,
    nn.Tanh(),
    nn.Linear(100,50, initializer="x"),
    nn.Tanh(),
    nn.Linear(50,50, initializer="x"),
    nn.Tanh(),
    nn.Linear(50,200, initializer="x"),
    nn.Sigmoid()
])
np.random.seed(123)
input = FloatTensor(np.random.rand(100,250), autograd=True)
np.random.seed(123)
target = FloatTensor(np.random.randint(0, 1, (100,200)), autograd=True)
np.random.seed(123)
grad = FloatTensor(0.01*np.ones((100,200)), autograd=False)

In [40]:
for param in model.parameters():
    param_np = param.to_numpy()
    if param_np.shape[0] != 1: # Skip bias weights
        print("Shape of weights {} - Sqrt of 1/(# inputs) {:4f}".format(param_np.shape, np.sqrt(1.0/param_np.shape[0])))
        print("Mean value {:2f} max value {:4f} min value {:4f} \n".format(np.mean(param_np), np.max(param_np), np.min(param_np)))

Shape of weights (250, 100) - Sqrt of 1/(# inputs) 0.063246
Mean value -0.000155 max value 0.999725 min value -0.999917 

Shape of weights (100, 50) - Sqrt of 1/(# inputs) 0.100000
Mean value 0.003130 max value 0.999725 min value -0.999748 

Shape of weights (50, 50) - Sqrt of 1/(# inputs) 0.141421
Mean value 0.012528 max value 0.999725 min value -0.999748 

Shape of weights (50, 200) - Sqrt of 1/(# inputs) 0.141421
Mean value -0.003119 max value 0.999725 min value -0.999748 



In [41]:
for i in range(15):
    pred = model(input)
    loss = (pred - target) ** 2
    loss.backward(grad)
    # note: zeroing out gradients has to happen by hand
    for p in model.parameters():
        p -= p.grad()
    print(loss.to_numpy().sum())

7532.93115248
4328.6242998
1247.12350158
527.160001765
305.659551966
301.321585643
298.716477336
283.256217185
201.75580824
201.581933988
201.448387705
201.341319961
201.252886501
201.178198584
201.114011104
