# PyTorch Basics Tutorial
A comprehensive guide to getting started with PyTorch

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [2]:
print("1. TENSORS - The Foundation of PyTorch")

# Creating tensors (similar to NumPy arrays)
# From lists
tensor_1d = torch.tensor([1, 2, 3, 4, 5])
tensor_2d = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(f"1D Tensor: {tensor_1d}")
print(f"2D Tensor:\n{tensor_2d}")

# Creating tensors with specific values
zeros = torch.zeros(3, 3)  # 3x3 matrix of zeros
ones = torch.ones(2, 4)    # 2x4 matrix of ones
random_tensor = torch.randn(3, 3)  # 3x3 matrix with random normal values
print(f"\nZeros:\n{zeros}")
print(f"\nRandom tensor:\n{random_tensor}")

# Tensor properties
print(f"\nTensor shape: {tensor_2d.shape}")
print(f"Tensor dtype: {tensor_2d.dtype}")
print(f"Tensor device: {tensor_2d.device}")

1. TENSORS - The Foundation of PyTorch
1D Tensor: tensor([1, 2, 3, 4, 5])
2D Tensor:
tensor([[1, 2, 3],
        [4, 5, 6]])

Zeros:
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

Random tensor:
tensor([[ 0.2402,  1.4003,  0.3646],
        [ 0.0591, -0.6264, -0.9232],
        [ 1.9276, -0.2713, -0.4819]])

Tensor shape: torch.Size([2, 3])
Tensor dtype: torch.int64
Tensor device: cpu


In [3]:
print("2. TENSOR OPERATIONS")

# Basic math operations
a = torch.tensor([1.0, 2.0, 3.0])
b = torch.tensor([4.0, 5.0, 6.0])

print(f"a: {a}")
print(f"b: {b}")
print(f"a + b: {a + b}")
print(f"a * b: {a * b}")  # Element-wise multiplication
print(f"a.dot(b): {a.dot(b)}")  # Dot product
# dot product of matrices are called matrix multiplication

# Matrix operations
matrix_a = torch.randn(3, 4)
matrix_b = torch.randn(4, 2)
matrix_product = torch.matmul(matrix_a, matrix_b)  # Matrix multiplication
print(f"\nMatrix A shape: {matrix_a.shape}")
print(f"Matrix B shape: {matrix_b.shape}")
print(f"Matrix Product shape: {matrix_product.shape}")

# Reshaping tensors
original = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]])
print(f"\nOriginal shape: {original.shape}")
reshaped = original.view(4, 2)  # Reshape to 4x2
print(f"Reshaped to 4x2:\n{reshaped}")

2. TENSOR OPERATIONS
a: tensor([1., 2., 3.])
b: tensor([4., 5., 6.])
a + b: tensor([5., 7., 9.])
a * b: tensor([ 4., 10., 18.])
a.dot(b): 32.0

Matrix A shape: torch.Size([3, 4])
Matrix B shape: torch.Size([4, 2])
Matrix Product shape: torch.Size([3, 2])

Original shape: torch.Size([2, 4])
Reshaped to 4x2:
tensor([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]])


In [4]:
print("3. AUTOGRAD - Automatic Differentiation")

# Enable gradient tracking
x = torch.tensor(2.0, requires_grad=True)
y = torch.tensor(3.0, requires_grad=True)

# Perform operations
z = x**2 + y**3
print(f"z = x² + y³ = {z.item()}")

# Compute gradients
z.backward()  # Computes dz/dx and dz/dy

print(f"dz/dx = 2x = {x.grad}")
print(f"dz/dy = 3y² = {y.grad}")


# Simple example
a = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(2.0, requires_grad=True)
c = torch.tensor(3.0, requires_grad=True)
d = a * b + c

d.backward()

print(f"\n\nGradient of d w.r.t a: {a.grad}")  # Should be b
print(f"Gradient of d w.r.t b: {b.grad}")  # Should be a
print(f"Gradient of d w.r.t c: {c.grad}")  # Should be 1

3. AUTOGRAD - Automatic Differentiation
z = x² + y³ = 31.0
dz/dx = 2x = 4.0
dz/dy = 3y² = 27.0


Gradient of d w.r.t a: 2.0
Gradient of d w.r.t b: 1.0
Gradient of d w.r.t c: 1.0


In [5]:
print("4. BUILDING A SIMPLE NEURAL NETWORK")

# Define a simple neural network
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        # Define layers
        self.fc1 = nn.Linear(10, 20)  # Input: 10 features, Output: 20
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(20, 5)   # Input: 20, Output: 5
        self.fc3 = nn.Linear(5, 1)    # Input: 5, Output: 1
    
    def forward(self, x):
        # Define forward pass
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

# Create model instance
model = SimpleNet()
print("Model architecture:")
print(model)

# Check model parameters
total_params = sum(p.numel() for p in model.parameters())
print(f"\nTotal parameters: {total_params}")

4. BUILDING A SIMPLE NEURAL NETWORK
Model architecture:
SimpleNet(
  (fc1): Linear(in_features=10, out_features=20, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=20, out_features=5, bias=True)
  (fc3): Linear(in_features=5, out_features=1, bias=True)
)

Total parameters: 331


In [6]:
print("5. TRAINING A MODEL - Complete Example")

# Create synthetic data for a simple regression problem
# y = 3x + 2 + noise
torch.manual_seed(42)
X_train = torch.randn(100, 1) * 10  # 100 samples, 1 feature
y_train = 3 * X_train + 2 + torch.randn(100, 1) * 2  # Add noise

# Define a simple linear model
class LinearModel(nn.Module):
    def __init__(self):
        super(LinearModel, self).__init__()
        self.linear = nn.Linear(1, 1)
    
    def forward(self, x):
        return self.linear(x)

# Initialize model, loss function, and optimizer
model = LinearModel()
criterion = nn.MSELoss()  # Mean Squared Error Loss
optimizer = optim.SGD(model.parameters(), lr=0.01)  # Stochastic Gradient Descent

print("Training a linear regression model...")
print("True relationship: y = 3x + 2")

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    # Forward pass
    predictions = model(X_train)
    loss = criterion(predictions, y_train)
    
    # Backward pass
    optimizer.zero_grad()  # Clear previous gradients
    loss.backward()        # Compute gradients
    optimizer.step()       # Update parameters
    
    if (epoch + 1) % 20 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Check learned parameters
weight = model.linear.weight.item()
bias = model.linear.bias.item()
print(f"\nLearned parameters:")
print(f"Weight (should be ~3): {weight:.4f}")
print(f"Bias (should be ~2): {bias:.4f}")


5. TRAINING A MODEL - Complete Example
Training a linear regression model...
True relationship: y = 3x + 2
Epoch [20/100], Loss: 49.2957
Epoch [40/100], Loss: 6.3420
Epoch [60/100], Loss: 3.4448
Epoch [80/100], Loss: 3.1945
Epoch [100/100], Loss: 3.1495

Learned parameters:
Weight (should be ~3): 3.0007
Bias (should be ~2): 1.9091


In [7]:
print("6. WORKING WITH GPU (if available)")

# Check if CUDA (GPU) is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Move tensor to GPU (if available)
cpu_tensor = torch.randn(3, 3)
gpu_tensor = cpu_tensor.to(device)
print(f"Tensor device: {gpu_tensor.device}")

# Move model to GPU
model_gpu = LinearModel().to(device)
print(f"Model is on: {next(model_gpu.parameters()).device}")

6. WORKING WITH GPU (if available)
Using device: cpu
Tensor device: cpu
Model is on: cpu


In [8]:
print("7. SAVING AND LOADING MODELS")

# Save model
torch.save(model.state_dict(), './model_weights.pth')
print("Model saved to model_weights.pth")

# Load model
new_model = LinearModel()
new_model.load_state_dict(torch.load('./model_weights.pth'))
new_model.eval()  # Set to evaluation mode
print("Model loaded successfully")

7. SAVING AND LOADING MODELS
Model saved to model_weights.pth
Model loaded successfully


In [9]:
print("SUMMARY OF KEY CONCEPTS")
print("""
1. TENSORS: Multi-dimensional arrays, the basic data structure
2. OPERATIONS: Mathematical operations on tensors
3. AUTOGRAD: Automatic differentiation for computing gradients
4. nn.Module: Base class for building neural networks
5. LOSS FUNCTIONS: Measure how well the model performs (MSELoss, CrossEntropyLoss, etc.)
6. OPTIMIZERS: Update model parameters (SGD, Adam, etc.)
7. TRAINING LOOP:
   - Forward pass (compute predictions)
   - Compute loss
   - Backward pass (compute gradients)
   - Update parameters
8. DEVICE: Move tensors/models to GPU for faster computation
9. SAVE/LOAD: Persist and restore model weights
""")

print("\nCompleted PyTorch basics tutorial!")

SUMMARY OF KEY CONCEPTS

1. TENSORS: Multi-dimensional arrays, the basic data structure
2. OPERATIONS: Mathematical operations on tensors
3. AUTOGRAD: Automatic differentiation for computing gradients
4. nn.Module: Base class for building neural networks
5. LOSS FUNCTIONS: Measure how well the model performs (MSELoss, CrossEntropyLoss, etc.)
6. OPTIMIZERS: Update model parameters (SGD, Adam, etc.)
7. TRAINING LOOP:
   - Forward pass (compute predictions)
   - Compute loss
   - Backward pass (compute gradients)
   - Update parameters
8. DEVICE: Move tensors/models to GPU for faster computation
9. SAVE/LOAD: Persist and restore model weights


Completed PyTorch basics tutorial!
