In [1]:
import torch

import torch.nn as nn 

import torch.nn.functional as F

Step 1: Define a Simple Neural Network

We'll use a 2-layer neural network for demonstration.

In [2]:
# Define a simple neural network

class SimpleNN(nn.Module):

    def __init__(self):
        super(SimpleNN, self).__init__()

        self.fc1 = nn.Linear(2,4) # Input layer (2 neurons) → Hidden layer (4 neurons)

        self.fc2 = nn.Linear(4,1) # Hidden layer (4 neurons) → Output layer (1 neuron)


    def forward(self,x):

        x = F.relu(self.fc1(x)) # ReLU activation for hidden layer

        x = torch.sigmoid(self.fc2(x)) # Sigmoid activation for output

        return x

In [3]:
# Instantiate the model

model = SimpleNN()

Step 2: Apply Different Weight Initializations

We will define functions for zero, Xavier, He, and random initialization and apply them.

1. Zero Initialization (BAD 🚫)

In [4]:
def zero_init(m):

    if isinstance(m, nn.Linear):

        nn.init.constant_(m.weight,0)

        nn.init.constant_(m.bias,0)

# Apply Zero Initialization

model.apply(zero_init)

SimpleNN(
  (fc1): Linear(in_features=2, out_features=4, bias=True)
  (fc2): Linear(in_features=4, out_features=1, bias=True)
)

2. Random Initialization (Baseline)

In [5]:
def random_init(m):

    if isinstance(m, nn.Linear):

        nn.init.uniform_(m.weight, -0.1,0.1) # Small random values

        nn.init.constant_(m.bias, 0)


# Apply Random Initialization

model.apply(random_init)

SimpleNN(
  (fc1): Linear(in_features=2, out_features=4, bias=True)
  (fc2): Linear(in_features=4, out_features=1, bias=True)
)

In [6]:
def xavier_init(m):

    if isinstance(m, nn.Linear):

        nn.init.xavier_uniform_(m.weight)

        nn.init.constant_(m.weight, 0)


# Apply Xavier Initialization

model.apply(xavier_init)

SimpleNN(
  (fc1): Linear(in_features=2, out_features=4, bias=True)
  (fc2): Linear(in_features=4, out_features=1, bias=True)
)

4. He (Kaiming) Initialization (Best for ReLU)

In [7]:
def he_init(m):

    if isinstance(m, nn.Linear):

        nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')

        nn.init.constant_(m.bias, 0)


# Apply He Initialization

model.apply(he_init)

SimpleNN(
  (fc1): Linear(in_features=2, out_features=4, bias=True)
  (fc2): Linear(in_features=4, out_features=1, bias=True)
)

Step 3: Verify Initialization

After applying an initialization method, we can inspect the initialized weights.

In [8]:
for name, param in model.named_parameters():

    print(f'{name} : {param.data} \n')

fc1.weight : tensor([[ 1.1865,  0.6319],
        [ 0.4075, -0.7439],
        [ 1.2187,  0.6133],
        [ 1.6621, -0.8599]]) 

fc1.bias : tensor([0., 0., 0., 0.]) 

fc2.weight : tensor([[0.7613, 0.5820, 1.0290, 0.5916]]) 

fc2.bias : tensor([0.]) 



Which Initialization is Best?

Zero Initialization: Causes neurons to learn the same features (not useful).

Random Initialization: Works but can be unstable.

Xavier Initialization: Good for sigmoid/tanh activations.

He Initialization: Best for ReLU networks (recommended for deep networks).