In [2]:
import torch
print(torch.__version__)

2.5.1+cu121


In [None]:
import torch
x = torch.randn(5, requires_grad=True)
y = x.pow(2)
print(f"x.requires_grad: {x.requires_grad}")
print(f"y.requires_grad: {y.requires_grad}")


with torch.no_grad():
    z = y * 2
print(f"y.requires_grad: {y.requires_grad}")
print(f"z.requires_grad: {z.requires_grad}")
print(f"y.grad_fn:{y.grad_fn}")
print(f"z.grad_fn:{z.grad_fn}")


x.requires_grad: True
y.requires_grad: True
y.requires_grad: True
z.requires_grad: False
y.grad_fn:<PowBackward0 object at 0x14ab54080d00>
z.grad_fn:None


In [1]:
import torch
import torch.nn as nn

# Simple model definition
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc = nn.Linear(3, 3)  # A simple fully connected layer
    
    def forward(self, x):
        return self.fc(x)

# Create an instance of the model
model = SimpleModel()

# Example 1: Understanding 'train' vs 'eval'
# Set the model to training mode
model.train()
print(f"Model in training mode: {model.training}")

for name, param in model.named_parameters():
    print(f"model is training: {model.training}, Parameter name: {name}, requires_grad: {param.requires_grad}")


# Set the model to evaluation mode
model.eval()
print(f"Model in evaluation mode: {model.training}")

for name, param in model.named_parameters():
    print(f"model is training: {model.training}, Parameter name: {name}, requires_grad: {param.requires_grad}")

# Example 2: Understanding 'requires_grad' for tensors
# Create a tensor with requires_grad set to True
tensor_train = torch.randn(3, requires_grad=True)
print(f"Tensor requires_grad=True: {tensor_train.requires_grad}")

# Create a tensor with requires_grad set to False (default)
tensor_no_grad = torch.randn(3, requires_grad=False)
print(f"Tensor requires_grad=False: {tensor_no_grad.requires_grad}")

# Example 3: Using 'with torch.no_grad()' context
print("\nBefore 'with torch.no_grad()':")
print(f"Requires grad for tensor (before): {tensor_train.requires_grad}")

# Disable gradient tracking
with torch.no_grad():
    tensor_no_grad = tensor_train * 2
    print(f"Tensor requires_grad inside 'torch.no_grad()': {tensor_no_grad.requires_grad}")
    print(f"grad_cn of tensor_no_grad inside 'torch.no_grad()': {tensor_no_grad.grad_fn}")

print(f"\nAfter 'with torch.no_grad()' block:")
print(f"Requires grad for tensor (after): {tensor_train.requires_grad}")

# Example 4: Gradient flow during forward pass
print("\nGradient flow demonstration:")
input_tensor = torch.randn(3, 3, requires_grad=True)  # Input tensor that requires gradients
output_tensor = model(input_tensor)  # Forward pass
print(f"Output tensor requires_grad: {output_tensor.requires_grad}")  # Should be True since model parameters require gradients

# Performing a backward pass
output_tensor.mean().backward()  # Perform a backward pass to compute gradients
print(f"Gradient for input tensor: {input_tensor.grad}")  # Gradient should be computed for input tensor

# Trying the same without gradients
with torch.no_grad():
    output_no_grad = model(input_tensor)
    print(f"Output tensor inside no_grad block requires_grad: {output_no_grad.requires_grad}")
    # No gradient tracking will happen inside the block


Model in training mode: True
model is training: True, Parameter name: fc.weight, requires_grad: True
model is training: True, Parameter name: fc.bias, requires_grad: True
Model in evaluation mode: False
model is training: False, Parameter name: fc.weight, requires_grad: True
model is training: False, Parameter name: fc.bias, requires_grad: True
Tensor requires_grad=True: True
Tensor requires_grad=False: False

Before 'with torch.no_grad()':
Requires grad for tensor (before): True
Tensor requires_grad inside 'torch.no_grad()': False
grad_cn of tensor_no_grad inside 'torch.no_grad()': None

After 'with torch.no_grad()' block:
Requires grad for tensor (after): True

Gradient flow demonstration:
Output tensor requires_grad: True
Gradient for input tensor: tensor([[ 0.1023, -0.1377,  0.0895],
        [ 0.1023, -0.1377,  0.0895],
        [ 0.1023, -0.1377,  0.0895]])
Output tensor inside no_grad block requires_grad: False
