<a href="https://colab.research.google.com/github/yawar-Ayub/Business-Card/blob/master/DL_Lab_Task2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Goal Build Task1 using Pytorch Proper classes
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"using device: {device}")

using device: cuda


In [2]:
transform = transforms.Compose(
    [transforms.ToTensor()]
)
train_dataset = datasets.MNIST(
    root="/data",
    download=True,
    train=True,
    transform = transform
    )
test_dataset = datasets.MNIST(
    root="/data",
    download=True,
    train=False,
    transform=transform
)

train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size=64,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset,
    batch_size=1000,
    shuffle=True,
)

print(f"\nTraining Examples : {len(train_dataset)}")
print(f"Test Examples: {len(test_dataset)}")
print(f"Number of Training batches: {len(train_loader)}")  # 938 (60000/64)
print(f"Number of Test batches: {len(test_loader)}")       # 10 (10000/1000)


100%|██████████| 9.91M/9.91M [00:00<00:00, 17.8MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 483kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.52MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 9.03MB/s]


Training Examples : 60000
Test Examples: 10000
Number of Training batches: 938
Number of Test batches: 10





In [4]:
# Define the Neural Network Class
# Neural Network Definition

class NeuralNetwork(nn.Module):
  """
  2-layer fully connected neural network

  Architecture:
  Input (784) -> Linear -> ReLU -> Linear -> Output (10)

  This is the Standard PyTorch Pattern:
  1. Inherit from nn.Module
  2. Define layers in __init__
  3. Define forward pass in forward()
  """
  def __init__(self,input_size,hidden_size,num_classes):
    """
    Initialize the network layers

    Args:
      input_size: Number of input features (784 for flatten 28*28)
      hidden_size: Number of neurons in hidden layer (128)
      num_classes: Number of output classes (10 for digits)
    """
    # Critical: Always call parent class constructor first
    # This registers the model with PyTorch's module system
    super(NeuralNetwork,self).__init__()

    # Define layer 1: Input -> Hidden
    # nn.Linear automatically creates weights and biases
    # Weight shape: (input_size,hidden_size) = (784,128)
    # Bias shape: (hiddensize,) = (128,)
    # Both are initialized automatically using Kaiming initialization
    self.fc1 = nn.Linear(input_size, hidden_size)

    # ReLU activation (no parameters to learn)
    # could also use : nn.Sigmoid(), nn.Tanh(), nn.LeakyReLU, etc.
    self.relu = nn.ReLU()

    # Define layer 2: Hidden -> Output
    # Weight shape: (hidden_size, num_classes) = (128,10)
    # Bias shape: (num_classes,) = (10,)
    self.fc2 = nn.Linear(hidden_size,num_classes)
    # Note : We Dont add softmax here!
    # nn.CrossEntropyLoss() applies softmax internally
  def forward(self,x):
    """
    Forward pass: defines how data flows through the network

    Args:
      x: Input tensor, shape (batch_size, input_size)
        Example: (64,784) for batch of 64 flattened images

    Returns:
    out: Output logits, shape(batch_size,num_classes)
         Example: (64,10) - raw scores for each class

    This method is called when you do: output = model(input)
    pyTorch automatically handles backward pass (gradients)
    """
    # Layer 1: Linear Transformation
    # Input: (batch_size,784)
    #output: (batch_size,128)
    #Operation: x@w1.T + b1
    out = self.fc1(x)

    # Activation: ReLU (element-wise)
    # Input: (batch_size,128)
    # Output: (batch_size,128)
    # Operation: x @ w1.T +b1
    out = self.relu(out)

    # Layer2 : Linear transformation
    # Input : (batch_size,128)
    # Output: (batch_size,10)
    # Operation out@w2.T + b2

    out = self.fc2(out)

    return out

# Create model instance
# Input: 784 pixels, Hidden: 128 neurons, Output: 10 classes
model = NeuralNetwork(input_size=28*28,hidden_size=128,num_classes=10)

# Move model to device (GPU or CPU)
model = model.to(device)

# Print the model Architecture
print(model)
print(f"\n Total parameters: {sum(p.numel() for p in model.parameters())} ")


NeuralNetwork(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

 Total parameters: 101770 


In [5]:
#       Key Concepts
# 1. nn.Module : Base Class for all neural networks in pytorch
# 2. __init__: Where you define all layers (runs once when creating model)
# 3. forward(): Where you define data flow (runs every time you pass data through)
# 4. nn.Linear(in,out): Fully connected layer with weight and biases

In [6]:
# Step 4: Define Loss Function and Optimizer
# Loss Function : measures how wrong the predictions are
# CrossEntropyLoss combines:
# 1. softmax (converts logits to probabilities)
# 2. Negative Log Likelihood (compares with true labels)
# Input : (batch_size,num_classes) logits and (batch_size,) targets
# Output : scaler loss Value
criterion = nn.CrossEntropyLoss()

# Optimizer: updates model parameters to minimize loss
# SGD (Stochastic Gradient Descent) is the basic optimizer
# Parameters:
# - model.parameters(): all weights and biases in the model
# -lr: learning rate (how big the update steps are)
# -momentum: adds "velocity" to updates (helps escape local minima)

optimizer = optim.SGD(
    model.parameters(), # which paramters to optimize
    lr = 0.01,          # Learning rate (try 0.001, 0.01, 0.1)
    momentum = 0.9      # Momentum factor ( 0.9 is standard, try 0.0 - 0.99)

    )

# Alternative optimizers you can try:
# optimizer = optim.Adam(model.parameters(),lr=0.01) # Adaptive learning rate
# optimizer = optim.RMSprop(model.parameters(), lr=0.01) # Good for RNNs
print("\nOptimizer: ",optimizer)
print("Loss Function",criterion)


Optimizer:  SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
Loss Function CrossEntropyLoss()


In [None]:
# Step 5 : Training Function
def train_one_epoch(model,train_loader,criterion,optimizer,device):
  """
  Train the model for one complete pass through the training data

  Args:
    model: Neural network to train
    train_loader: DataLoader with training data
    Criterion: Loss function
    optimer: Optimization algorithm
    device: 'cuda' or 'cpu'

  Returns :
    avg_loss: Average loss across all batches
    accuracy: Training accuracy (percentage)
    """