In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        # Input layer -> Hidden layer (3 -> 4)
        self.fc1 = nn.Linear(3, 4)
        # Hidden layer -> Output layer (4 -> 1)
        self.fc2 = nn.Linear(4, 1)

    def forward(self, x):
        # Pass input through first layer + activation
        x = F.relu(self.fc1(x))   # hidden layer with ReLU
        # Pass through output layer
        x = torch.sigmoid(self.fc2(x))  # output layer with sigmoid (for binary classification)
        return x

# Step 2: Create a model instance
model = NeuralNetwork()

# Step 3: Create dummy input and target
x = torch.randn(5, 3)    # batch of 5 samples, each with 3 features
y_true = torch.randint(0, 2, (5, 1)).float()  # random binary labels (0 or 1)


In [None]:

# Step 4: Define a loss function and optimizer
criterion = nn.BCELoss()               # binary cross entropy loss
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

# Forward pass
y_pred = model(x)                      # shape (5,1)
loss = criterion(y_pred, y_true)       # compute loss
print("Predictions:", y_pred.squeeze().detach().numpy())
print("Loss before backward:", loss.item())


Predictions: [0.40170285 0.37684378 0.41908705 0.29825234 0.34698552]
Loss before backward: 0.6681616902351379


In [12]:
for p in model.parameters():
    p
    break

In [42]:
node = loss.grad_fn
while node is not None:
    print(type(node))
    if hasattr(node, 'next_functions'):
        node = node.next_functions[0][0]  # follow the chain
    else:
        node = None

<class 'BinaryCrossEntropyBackward0'>
<class 'SigmoidBackward0'>
<class 'AddmmBackward0'>
<class 'AccumulateGrad'>


IndexError: tuple index out of range

In [43]:
def traverse(node, visited=set()):
    if node is None or node in visited:
        return
    visited.add(node)
    print(type(node))
    if hasattr(node, 'next_functions'):
        for f, _ in node.next_functions:
            traverse(f, visited)

traverse(loss.grad_fn)

<class 'BinaryCrossEntropyBackward0'>
<class 'SigmoidBackward0'>
<class 'AddmmBackward0'>
<class 'AccumulateGrad'>
<class 'ReluBackward0'>
<class 'AddmmBackward0'>
<class 'AccumulateGrad'>
<class 'TBackward0'>
<class 'AccumulateGrad'>
<class 'TBackward0'>
<class 'AccumulateGrad'>


In [31]:
for name, param in model.named_parameters():
    print(name, param.shape, param.requires_grad, param.grad, param.grad_fn)

fc1.weight torch.Size([4, 3]) True None None
fc1.bias torch.Size([4]) True None None
fc2.weight torch.Size([1, 4]) True None None
fc2.bias torch.Size([1]) True None None


In [32]:

# Step 5: Backward pass
optimizer.zero_grad()   # clear previous gradients
loss.backward()         # compute gradients (dL/dW)
print("\nGradient for fc1 weights:\n", model.fc1.weight.grad)  # check gradients
for name, param in model.named_parameters():
    print(name, param.data, param.grad)
# Step 6: Update parameters
# optimizer.step()        # apply gradient descent update
# print("\nLoss after one update step:")
# print(criterion(model(x), y_true).item())


Gradient for fc1 weights:
 tensor([[ 0.0423, -0.0489, -0.0277],
        [-0.0164, -0.0318, -0.0152],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0083,  0.0067,  0.0057]])
fc1.weight tensor([[-0.1276, -0.4260, -0.5297],
        [ 0.2210, -0.3671, -0.2224],
        [-0.2153,  0.2381,  0.0116],
        [ 0.2245,  0.1306, -0.4408]]) tensor([[ 0.0423, -0.0489, -0.0277],
        [-0.0164, -0.0318, -0.0152],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0083,  0.0067,  0.0057]])
fc1.bias tensor([ 0.4330,  0.2515, -0.3183, -0.3463]) tensor([-0.0108, -0.0186,  0.0000, -0.0105])
fc2.weight tensor([[ 0.3565,  0.1570, -0.2207, -0.1637]]) tensor([[ 0.0712,  0.0429,  0.0000, -0.0239]])
fc2.bias tensor([0.1944]) tensor([-0.0100])


In [17]:
optimizer = torch.optim.SGD([
    {"params": model.fc1.parameters(), "lr": 0.1},   # group 1
    {"params": model.fc2.parameters(), "lr": 0.01},  # group 2
], momentum=0.9)

In [18]:
optimizer.param_groups

[{'params': [Parameter containing:
   tensor([[-0.2444, -0.3968, -0.3645],
           [-0.3145, -0.1763,  0.2024],
           [-0.1106,  0.5706,  0.5384],
           [ 0.1965,  0.2396, -0.1818]], requires_grad=True),
   Parameter containing:
   tensor([ 0.4497, -0.5074,  0.4030, -0.2264], requires_grad=True)],
  'lr': 0.1,
  'momentum': 0.9,
  'dampening': 0,
  'weight_decay': 0,
  'nesterov': False,
  'maximize': False,
  'foreach': None,
  'differentiable': False,
  'fused': None},
 {'params': [Parameter containing:
   tensor([[-0.2614, -0.4362,  0.0727, -0.2088]], requires_grad=True),
   Parameter containing:
   tensor([-0.4236], requires_grad=True)],
  'lr': 0.01,
  'momentum': 0.9,
  'dampening': 0,
  'weight_decay': 0,
  'nesterov': False,
  'maximize': False,
  'foreach': None,
  'differentiable': False,
  'fused': None}]