<a href="https://colab.research.google.com/github/victordaniel/DEEP-LEARNIG-COURSE/blob/main/Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the transform
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load the dataset
train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)

# Create DataLoader
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Define a simple CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, 1)
        self.conv2 = nn.Conv2d(16, 32, 3, 1)
        self.fc1 = nn.Linear(32 * 6 * 6, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2)
        x = x.view(-1, 32 * 6 * 6)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate the model, loss function, and optimizer
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 5
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass, backward pass, and optimize
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 33845356.68it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Epoch [1/5], Step [100/782], Loss: 1.7779
Epoch [1/5], Step [200/782], Loss: 1.6873
Epoch [1/5], Step [300/782], Loss: 1.5136
Epoch [1/5], Step [400/782], Loss: 1.4466
Epoch [1/5], Step [500/782], Loss: 1.3792
Epoch [1/5], Step [600/782], Loss: 1.3298
Epoch [1/5], Step [700/782], Loss: 1.3452
Epoch [2/5], Step [100/782], Loss: 1.0866
Epoch [2/5], Step [200/782], Loss: 1.3736
Epoch [2/5], Step [300/782], Loss: 0.9734
Epoch [2/5], Step [400/782], Loss: 1.0109
Epoch [2/5], Step [500/782], Loss: 0.9515
Epoch [2/5], Step [600/782], Loss: 1.2780
Epoch [2/5], Step [700/782], Loss: 1.1926
Epoch [3/5], Step [100/782], Loss: 1.1281
Epoch [3/5], Step [200/782], Loss: 1.0282
Epoch [3/5], Step [300/782], Loss: 1.0821
Epoch [3/5], Step [400/782], Loss: 1.2290
Epoch [3/5], Step [500/782], Loss: 0.9623
Epoch [3/5], Step [600/782], Loss: 1.1268
Epoch [3/5], Step [700/782], Loss: 1.1562
Epoch [4/5], Step [100/782], 

#Extension #load model #save model #eval #Print accuracy

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the transform
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load the dataset
train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)

# Create DataLoader
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Define a simple CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, 1)
        self.conv2 = nn.Conv2d(16, 32, 3, 1)
        self.fc1 = nn.Linear(32 * 6 * 6, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2)
        x = x.view(-1, 32 * 6 * 6)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate the model, loss function, and optimizer
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Function to save the model
def save_model(epoch, model, optimizer, loss, path):
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
    }, path)

# Function to load the model
def load_model(path, model, optimizer):
    checkpoint = torch.load(path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    return epoch, loss

# Training and validation
num_epochs = 5
best_loss = float('inf')
model_save_path = './model.pth'

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

    epoch_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {epoch_loss:.4f}')

    # Save the model if the validation loss is the best we've seen so far.
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        save_model(epoch, model, optimizer, epoch_loss, model_save_path)

# Validation
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the test images: {100 * correct / total} %')


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:18<00:00, 9226921.63it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Epoch [1/5], Step [100/782], Loss: 1.5238
Epoch [1/5], Step [200/782], Loss: 1.5151
Epoch [1/5], Step [300/782], Loss: 1.6491
Epoch [1/5], Step [400/782], Loss: 1.2008
Epoch [1/5], Step [500/782], Loss: 1.5274
Epoch [1/5], Step [600/782], Loss: 1.3187
Epoch [1/5], Step [700/782], Loss: 1.2800
Epoch [1/5], Average Loss: 1.4741
Epoch [2/5], Step [100/782], Loss: 1.2894
Epoch [2/5], Step [200/782], Loss: 1.2377
Epoch [2/5], Step [300/782], Loss: 1.3691
Epoch [2/5], Step [400/782], Loss: 1.2598
Epoch [2/5], Step [500/782], Loss: 1.1166
Epoch [2/5], Step [600/782], Loss: 1.0534
Epoch [2/5], Step [700/782], Loss: 1.2542
Epoch [2/5], Average Loss: 1.1440
Epoch [3/5], Step [100/782], Loss: 1.1997
Epoch [3/5], Step [200/782], Loss: 1.0584
Epoch [3/5], Step [300/782], Loss: 0.8691
Epoch [3/5], Step [400/782], Loss: 0.9212
Epoch [3/5], Step [500/782], Loss: 0.8295
Epoch [3/5], Step [600/782], Loss: 1.1126
Epo

#ACCURACY to be improved , dropuout ,enhanced architecture ,data augmentation, learnig rate scheduler is added.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the transform with data augmentation
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load the dataset
train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)

# Create DataLoader
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Define an enhanced CNN model
class EnhancedCNN(nn.Module):
    def __init__(self):
        super(EnhancedCNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.fc1 = nn.Linear(128*2*2, 128)
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out

# Instantiate the model, loss function, optimizer, and scheduler
model = EnhancedCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

# Function to save the model
def save_model(epoch, model, optimizer, loss, path):
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
    }, path)

# Function to load the model
def load_model(path, model, optimizer):
    checkpoint = torch.load(path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    return epoch, loss

# Training and validation
num_epochs = 50
best_loss = float('inf')
model_save_path = './model.pth'

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

    epoch_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {epoch_loss:.4f}')

    scheduler.step()

    # Save the model if the validation loss is the best we've seen so far.
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        save_model(epoch, model, optimizer, epoch_loss, model_save_path)

# Validation
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the test images: {100 * correct / total} %')


Files already downloaded and verified
Files already downloaded and verified
Epoch [1/50], Step [100/782], Loss: 1.8477
Epoch [1/50], Step [200/782], Loss: 1.6041
Epoch [1/50], Step [300/782], Loss: 1.4495
Epoch [1/50], Step [400/782], Loss: 1.3800
Epoch [1/50], Step [500/782], Loss: 1.5237
Epoch [1/50], Step [600/782], Loss: 1.3321
Epoch [1/50], Step [700/782], Loss: 1.5248
Epoch [1/50], Average Loss: 1.4589
Epoch [2/50], Step [100/782], Loss: 1.1387
Epoch [2/50], Step [200/782], Loss: 1.0612
Epoch [2/50], Step [300/782], Loss: 1.0934
Epoch [2/50], Step [400/782], Loss: 1.0757
Epoch [2/50], Step [500/782], Loss: 0.9908
Epoch [2/50], Step [600/782], Loss: 1.0988
Epoch [2/50], Step [700/782], Loss: 1.0933
Epoch [2/50], Average Loss: 1.1519
Epoch [3/50], Step [100/782], Loss: 1.1490
Epoch [3/50], Step [200/782], Loss: 0.6958
Epoch [3/50], Step [300/782], Loss: 0.9447
Epoch [3/50], Step [400/782], Loss: 1.1636
Epoch [3/50], Step [500/782], Loss: 0.8200
Epoch [3/50], Step [600/782], Loss: 0

In [None]:
!git init
!git add .
!git config --global user.email "victor.nitk@gmail.com"
!git config --global user.name "Victor Daniel"



Reinitialized existing Git repository in /content/.git/


In [None]:
!git commit -m "Initial commit"
!git remote add origin https://github.com/victordaniel/DEEP-LEARNIG-COURSE.git


On branch master
nothing to commit, working tree clean


In [None]:
!git push -u origin main


error: src refspec main does not match any
[31merror: failed to push some refs to 'https://github.com/victordaniel/DEEP-LEARNIG-COURSE.git'
[m

#BASE MODEL WITH TIME AND ACCURACY PRINTED

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
import time
import matplotlib.pyplot as plt
import numpy as np
from torch.autograd import Variable

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Function to save the model
def save_model(epoch, model, optimizer, loss, path):
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
    }, path)

# Function to load the model
def load_model(path, model, optimizer):
    checkpoint = torch.load(path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    return epoch, loss

# Function to train the model
def train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs=50):
    train_losses = []
    val_losses = []
    best_loss = float('inf')

    for epoch in range(num_epochs):
        model.train()
        running_train_loss = 0.0
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_train_loss += loss.item()
            if (i+1) % 100 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

        epoch_train_loss = running_train_loss / len(train_loader)
        train_losses.append(epoch_train_loss)
        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {epoch_train_loss:.4f}')

        # Validation
        model.eval()
        running_val_loss = 0.0
        with torch.no_grad():
            for images, labels in test_loader:
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                running_val_loss += loss.item()

        epoch_val_loss = running_val_loss / len(test_loader)
        val_losses.append(epoch_val_loss)
        print(f'Epoch [{epoch+1}/{num_epochs}], Validation Loss: {epoch_val_loss:.4f}')

        scheduler.step()

        # Save the model if the validation loss is the best we've seen so far.
        if epoch_val_loss < best_loss:
            best_loss = epoch_val_loss
            save_model(epoch, model, optimizer, epoch_val_loss, './model.pth')

    return train_losses, val_losses

# Function to evaluate the model
def evaluate_model(model, test_loader):
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
    return accuracy

# Define the transform with data augmentation for advanced models
transform_advanced = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Define the transform without data augmentation for the baseline model
transform_baseline = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load the datasets
train_dataset_baseline = CIFAR10(root='./data', train=True, download=True, transform=transform_baseline)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform_baseline)

train_loader_baseline = DataLoader(dataset=train_dataset_baseline, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False, num_workers=2)

train_dataset_advanced = CIFAR10(root='./data', train=True, download=True, transform=transform_advanced)
train_loader_advanced = DataLoader(dataset=train_dataset_advanced, batch_size=64, shuffle=True, num_workers=2)

# Baseline Model
class BaselineCNN(nn.Module):
    def __init__(self):
        super(BaselineCNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.fc1 = nn.Linear(32 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

# Model 1
class Model1CNN(nn.Module):
    def __init__(self):
        super(Model1CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out

# Model 2
class Model2CNN(nn.Module):
    def __init__(self):
        super(Model2CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.fc1 = nn.Linear(256 * 4 * 4, 512)
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out

# Training and evaluation for Baseline Model
model_baseline = BaselineCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_baseline.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

start_time = time.time()
train_losses, val_losses = train_model(model_baseline, train_loader_baseline, criterion, optimizer, scheduler, num_epochs=50)
end_time = time.time()
time_baseline = end_time - start_time
accuracy_baseline = evaluate_model(model_baseline, test_loader)
print(f'Baseline Model - Time: {time_baseline:.2f}s, Accuracy: {accuracy_baseline:.2f}%')

# Training and evaluation for Model 1
model1 = Model1CNN().to(device)
optimizer = optim.Adam(model1.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.01, steps_per_epoch=len(train_loader_advanced), epochs=50)

start_time = time.time()
train_losses, val_losses = train_model(model1, train_loader_advanced, criterion, optimizer, scheduler, num_epochs=50)
end_time = time.time()
time_model1 = end_time - start_time
accuracy_model1 = evaluate_model(model1, test_loader)
print(f'Model 1 - Time: {time_model1:.2f}s, Accuracy: {accuracy_model1:.2f}%')

# Training and evaluation for Model 2
model2 = Model2CNN().to(device)
optimizer = optim.Adam(model2.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.01, steps_per_epoch=len(train_loader_advanced), epochs=50)

start_time = time.time()
train_losses, val_losses = train_model(model2, train_loader_advanced, criterion, optimizer, scheduler, num_epochs=50)
end_time = time.time()
time_model2 = end_time - start_time
accuracy_model2 = evaluate_model(model2, test_loader)
print(f'Model 2 - Time: {time_model2:.2f}s, Accuracy: {accuracy_model2:.2f}%')

# Summary
print(f'Baseline Model - Time: {time_baseline:.2f}s, Accuracy: {accuracy_baseline:.2f}%')
print(f'Model 1 - Time: {time_model1:.2f}s, Accuracy: {accuracy_model1:.2f}%')
print(f'Model 2 - Time: {time_model2:.2f}s, Accuracy: {accuracy_model2:.2f}%')


Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Epoch [1/50], Step [100/782], Loss: 1.6827
Epoch [1/50], Step [200/782], Loss: 1.3844
Epoch [1/50], Step [300/782], Loss: 1.4897
Epoch [1/50], Step [400/782], Loss: 1.0379
Epoch [1/50], Step [500/782], Loss: 1.1358
Epoch [1/50], Step [600/782], Loss: 1.6833
Epoch [1/50], Step [700/782], Loss: 1.1050
Epoch [1/50], Training Loss: 1.3358
Epoch [1/50], Validation Loss: 1.1732
Epoch [2/50], Step [100/782], Loss: 0.9704
Epoch [2/50], Step [200/782], Loss: 0.8630
Epoch [2/50], Step [300/782], Loss: 0.8133
Epoch [2/50], Step [400/782], Loss: 1.1475
Epoch [2/50], Step [500/782], Loss: 0.7859
Epoch [2/50], Step [600/782], Loss: 0.9721
Epoch [2/50], Step [700/782], Loss: 1.0769
Epoch [2/50], Training Loss: 1.0147
Epoch [2/50], Validation Loss: 0.9670
Epoch [3/50], Step [100/782], Loss: 0.7864
Epoch [3/50], Step [200/782], Loss: 1.0399
Epoch [3/50], Step [300/782], Loss: 1.1820
Epoch [

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd

# Sample CSV data
csv_data = """
data,label
1.0,0
2.0,1
3.0,0
4.0,1
"""

# Write the sample CSV data to a file
with open('data.csv', 'w') as f:
    f.write(csv_data)

# Custom Dataset
class CustomDataset(Dataset):
    def __init__(self, csv_file):
        self.data_frame = pd.read_csv(csv_file)

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        data = self.data_frame.iloc[idx, 0]  # Get the data value at the given index
        label = self.data_frame.iloc[idx, 1]  # Get the label value at the given index
        sample = {'data': torch.tensor(data, dtype=torch.float32), 'label': torch.tensor(label, dtype=torch.long)}
        return sample

# Create Dataset and DataLoader
dataset = CustomDataset(csv_file='data.csv')
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

# Iterate through the DataLoader
for i, batch in enumerate(dataloader):
    print(f"Batch {i}:")
    print(batch)


#**Transfer Learning in PyTorch**
Transfer learning leverages pre-trained models to solve new tasks, especially useful for tasks with limited labeled data. This method reduces training time and improves performance.

###**Key Concepts**
**Pre-trained Models:** Models trained on large datasets.
**Feature Extraction:** Using pre-trained models to extract features.
**Fine-Tuning:** Slightly adjusting pre-trained model weights.
Steps for Implementing Transfer Learning

**1. Data Transforms**

We define transformations for the training and validation data. These transformations include resizing, cropping, normalizing, and data augmentation techniques.


# Main Goal and Key Concepts of the Program

The main goal of the following program is to demonstrate **transfer learning** using a pre-trained Convolutional Neural Network (CNN) model (ResNet18) for a custom image classification task. Transfer learning leverages a model pre-trained on a large dataset (like ImageNet) and fine-tunes it to work on a smaller, specific dataset.

## Key Concepts Explained in the Program:

### 1. Transfer Learning
- Reuse of a pre-trained model on a new task.
- Fine-tuning the model to adapt it to the new task.

### 2. Data Preparation
- Applying appropriate transformations (resizing, cropping, normalization) to the training and validation datasets to ensure the data is in the correct format for the pre-trained model.

### 3. Model Modification
- Loading a pre-trained ResNet18 model.
- Modifying the final fully connected (FC) layer to match the number of classes in the new dataset.

### 4. Training and Evaluation
- Setting up the training loop with a training and validation phase.
- Using an optimizer (SGD) and a learning rate scheduler (StepLR) to optimize the model.
- Tracking and saving the best-performing model based on validation accuracy.

### 5. Utilizing PyTorch
- Using PyTorch's built-in functionalities for data loading (`DataLoader`), transformations (`transforms`), and model training (`nn`, `optim`).

## Detailed Explanation:

### 1. Transfer Learning
- **Concept**: Instead of training a CNN from scratch, which is computationally expensive and requires a large amount of labeled data, we use a model pre-trained on a large dataset (ImageNet). The pre-trained model has already learned useful features, such as edges and textures, which can be applied to a new but related task.
- **Implementation**: We load a pre-trained ResNet18 model and replace its final FC layer with a new one that matches the number of classes in our specific dataset.

### 2. Data Preparation
- **Transformations**: Data augmentation techniques like random cropping and horizontal flipping are applied to the training data to make the model robust to variations. Both training and validation data are normalized to match the distribution of the ImageNet dataset, ensuring the pre-trained model's weights are applied effectively.

### 3. Model Modification
- **Loading Pre-trained Model**: The ResNet18 model pre-trained on ImageNet is loaded.
- **Replacing FC Layer**: The original FC layer, which outputs 1000 classes (ImageNet), is replaced with a new FC layer that outputs the number of classes in the custom dataset.

### 4. Training and Evaluation
- **Training Loop**: The model is trained over multiple epochs. In each epoch, it goes through both training and validation phases.
- **Optimization**: An SGD optimizer with momentum and a StepLR scheduler are used to optimize the model parameters.
- **Tracking Performance**: During training, the model's performance is tracked by calculating loss and accuracy for both training and validation sets. The model's state is saved if it achieves the best validation accuracy.

### 5. Using PyTorch
- **DataLoader**: Efficiently loads data in batches, shuffling the training data to ensure randomness.
- **Transforms**: Provides a way to apply transformations to images, ensuring they are in the correct format and normalized.
- **Neural Network Module (`nn`)**: Defines the network architecture and the loss function.
- **Optimizer Module (`optim`)**: Provides optimization algorithms to adjust the model weights during training.

## Concepts covered

The main concept this program aims to explain is **transfer learning** with a focus on practical implementation using PyTorch. It shows how to:
- Prepare data for transfer learning.
- Modify a pre-trained model to suit a new classification task.
- Train and evaluate the modified model efficiently using PyTorch.

By understanding and implementing this program, one can leverage powerful pre-trained models for their specific tasks, saving time and resources while achieving high performance.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
import time
import copy
import os

# Data Transforms
# Define transformations for the training and validation datasets.
# These include resizing, cropping, normalizing, and data augmentation for training data.
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),  # Randomly crop the image to 224x224 pixels
        transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
        transforms.ToTensor(),  # Convert the image to a tensor
        transforms.Normalize([0.485, 0.456, 0.406],  # Normalize the image with mean and std deviation
                             [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),  # Resize the image to 256 pixels
        transforms.CenterCrop(224),  # Center crop the image to 224x224 pixels
        transforms.ToTensor(),  # Convert the image to a tensor
        transforms.Normalize([0.485, 0.456, 0.406],  # Normalize the image with mean and std deviation
                             [0.229, 0.224, 0.225])
    ]),
}

# Load Data
# Load the training and validation datasets using ImageFolder and apply the transformations defined above.
# Create DataLoader objects to facilitate batching and shuffling of the data.
data_dir = 'data/hymenoptera_data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=4)
               for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # Use GPU if available, otherwise use CPU

# Load Pre-trained Model
# Load the ResNet18 model pre-trained on ImageNet.
# Modify the final fully connected layer to match the number of classes in the new dataset.
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features  # Get the number of input features to the fully connected layer
model_ft.fc = nn.Linear(num_ftrs, len(class_names))  # Replace the FC layer with a new one with the correct number of output features

model_ft = model_ft.to(device)  # Move the model to the GPU or CPU
criterion = nn.CrossEntropyLoss()  # Define the loss function

# Set Up Optimizer and Scheduler
# Set up the optimizer (SGD) and the learning rate scheduler (StepLR).
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

# Training and Evaluation Functions
# Define the function to train and evaluate the model.
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()  # Track the time taken to train the model
    best_model_wts = copy.deepcopy(model.state_dict())  # Initialize best model weights
    best_acc = 0.0  # Initialize best accuracy

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluation mode

            running_loss = 0.0  # Initialize running loss
            running_corrects = 0  # Initialize running corrects

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()  # Zero the parameter gradients

                # Forward pass
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backward pass + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()  # Step the learning rate scheduler

            epoch_loss = running_loss / dataset_sizes[phase]  # Calculate epoch loss
            epoch_acc = running_corrects.double() / dataset_sizes[phase]  # Calculate epoch accuracy

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Deep copy the model if it has the best accuracy
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # Load best model weights
    model.load_state_dict(best_model_wts)
    return model

# Train the Model
# Call the train_model function to train the model with the specified parameters.
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25)

# Save the Model
# Save the trained model's state dictionary to a file.
torch.save(model_ft.state_dict(), 'model_ft.pth')


# **Using Different Pre-trained Models**
**VGG16:**

In [None]:
import torch
import torch.nn as nn
from torchvision import models

# Load pre-trained VGG16 model
model_vgg16 = models.vgg16(pretrained=True)

# Freeze all layers
for param in model_vgg16.parameters():
    param.requires_grad = False

# Replace the final fully connected layer
num_ftrs = model_vgg16.classifier[6].in_features
model_vgg16.classifier[6] = nn.Linear(num_ftrs, len(class_names))

# Move the model to the device
model_vgg16 = model_vgg16.to(device)


**Inception v3:**

In [None]:
import torch
import torch.nn as nn
from torchvision import models

# Load pre-trained Inception v3 model
model_inception = models.inception_v3(pretrained=True)

# Freeze all layers
for param in model_inception.parameters():
    param.requires_grad = False

# Replace the final fully connected layer
num_ftrs = model_inception.fc.in_features
model_inception.fc = nn.Linear(num_ftrs, len(class_names))

# Move the model to the device
model_inception = model_inception.to(device)


**DenseNet:**

In [None]:
import torch
import torch.nn as nn
from torchvision import models

# Load pre-trained DenseNet model
model_densenet = models.densenet121(pretrained=True)

# Freeze all layers
for param in model_densenet.parameters():
    param.requires_grad = False

# Replace the final fully connected layer
num_ftrs = model_densenet.classifier.in_features
model_densenet.classifier = nn.Linear(num_ftrs, len(class_names))

# Move the model to the device
model_densenet = model_densenet.to(device)


MobileNet v2:

In [None]:
import torch
import torch.nn as nn
from torchvision import models

# Load pre-trained MobileNet v2 model
model_mobilenet = models.mobilenet_v2(pretrained=True)

# Freeze all layers
for param in model_mobilenet.parameters():
    param.requires_grad = False

# Replace the final fully connected layer
num_ftrs = model_mobilenet.classifier[1].in_features
model_mobilenet.classifier[1] = nn.Linear(num_ftrs, len(class_names))

# Move the model to the device
model_mobilenet = model_mobilenet.to(device)


#Multiple pretrained models

In [None]:
import torch
import torch.nn as nn
from torchvision import models

# List of models to use
models_list = ['vgg16', 'inception_v3', 'densenet121', 'mobilenet_v2']

for model_name in models_list:
    # Load the pre-trained model
    model = getattr(models, model_name)(pretrained=True)

    # Freeze all the layers
    for param in model.parameters():
        param.requires_grad = False

    # Replace the final fully connected layer
    if model_name == 'vgg16':
        num_ftrs = model.classifier[6].in_features
        model.classifier[6] = nn.Linear(num_ftrs, len(class_names))
    elif model_name == 'inception_v3':
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, len(class_names))
    elif model_name == 'densenet121':
        num_ftrs = model.classifier.in_features
        model.classifier = nn.Linear(num_ftrs, len(class_names))
    elif model_name == 'mobilenet_v2':
        num_ftrs = model.classifier[1].in_features
        model.classifier[1] = nn.Linear(num_ftrs, len(class_names))

    # Move the model to the device
    model = model.to(device)

    # Define the loss function
    criterion = nn.CrossEntropyLoss()

    # Set up the optimizer and scheduler
    optimizer = optim.SGD(model.classifier.parameters(), lr=0.001, momentum=0.9)
    exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    # Train the model
    model = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=25)

    # Save the model
    torch.save(model.state_dict(), f'model_{model_name}.pth')


# Transfer Learning with Pre-trained ResNet18 on CIFAR-10 Dataset

**Step 1: Setup and Imports**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import time
import copy
import os

# Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


**Step 2: Data Preparation**

We'll use CIFAR-10 and pretend it is our custom dataset.

We'll split it into training and validation sets.

In [None]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = './data'
trainset = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=data_transforms['train'])
trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)

valset = datasets.CIFAR10(root=data_dir, train=False, download=True, transform=data_transforms['val'])
valloader = DataLoader(valset, batch_size=32, shuffle=False, num_workers=2)

dataloaders = {'train': trainloader, 'val': valloader}
dataset_sizes = {'train': len(trainset), 'val': len(valset)}
class_names = trainset.classes


**Step 3: Model Preparation**

Load a pre-trained ResNet18 model and modify the final layer to fit our custom dataset

In [None]:
# Load the pre-trained ResNet18 model
model_ft = models.resnet18(pretrained=True)

# Freeze all the layers in the pre-trained model
for param in model_ft.parameters():
    param.requires_grad = False

# Modify the fully connected layer to match the number of classes in the new dataset
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, len(class_names))

# Move the model to the specified device (GPU or CPU)
model_ft = model_ft.to(device)


**Step 4: Training**

Define the loss function, optimizer, and training loop.

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.fc.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

def train_model(model, criterion, optimizer, scheduler, num_epochs=10):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    model.load_state_dict(best_model_wts)
    return model

model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=10)


**Step 5: Evaluation**

Evaluate the model's performance on the test set.

In [None]:
def test_model(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in dataloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy of the network on the {total} test images: {100 * correct / total:.2f}%')

test_model(model_ft, valloader)


**Step 6: Visualization**

Visualize some predictions made by the model.

In [None]:
def imshow(img):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# Get some random test images
dataiter = iter(valloader)
images, labels = dataiter.next()

# Print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join(f'{class_names[labels[j]]}' for j in range(4)))

# Predictions
images = images.to(device)
outputs = model_ft(images)
_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join(f'{class_names[predicted[j]]}' for j in range(4)))


# **Transfer Learning with Multiple Pre-trained Models on CIFAR-10 Dataset**

In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import time
import copy

# Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data Preparation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = './data'
trainset = torchvision.datasets.CIFAR10(root=data_dir, train=True, download=True, transform=data_transforms['train'])
trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)

valset = torchvision.datasets.CIFAR10(root=data_dir, train=False, download=True, transform=data_transforms['val'])
valloader = DataLoader(valset, batch_size=32, shuffle=False, num_workers=2)

dataloaders = {'train': trainloader, 'val': valloader}
dataset_sizes = {'train': len(trainset), 'val': len(valset)}
class_names = trainset.classes

# Function to prepare a model
def prepare_model(model_name, num_classes, feature_extract=True):
    if model_name == "resnet":
        model = models.resnet18(pretrained=True)
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, num_classes)
    elif model_name == "vgg":
        model = models.vgg16(pretrained=True)
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.classifier[6].in_features
        model.classifier[6] = nn.Linear(num_ftrs, num_classes)
    elif model_name == "densenet":
        model = models.densenet121(pretrained=True)
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.classifier.in_features
        model.classifier = nn.Linear(num_ftrs, num_classes)
    else:
        raise ValueError("Invalid model name")

    return model.to(device)

# Function to freeze layers
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

# Prepare models
models_list = {
    'resnet': prepare_model('resnet', len(class_names)),
    'vgg': prepare_model('vgg', len(class_names)),
    'densenet': prepare_model('densenet', len(class_names))
}

# Training Function
def train_model(model, criterion, optimizer, scheduler, num_epochs=10):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    model.load_state_dict(best_model_wts)
    return model

# Train each model
for model_name, model in models_list.items():
    print(f"Training {model_name} model...")
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    models_list[model_name] = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=10)
    print(f"Finished training {model_name} model.\n")

# Evaluation Function
def test_model(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in dataloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy of the network on the {total} test images: {100 * correct / total:.2f}%')

# Evaluate each model
for model_name, model in models_list.items():
    print(f"Evaluating {model_name} model...")
    test_model(model, valloader)
    print(f"Finished evaluating {model_name} model.\n")

# Visualization
def imshow(img):
    img = img / 2 + 0.5  # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# Visualize predictions
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(valloader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title(f'predicted: {class_names[preds[j]]}')
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

# Visualize predictions for each model
for model_name, model in models_list.items():
    print(f"Visualizing predictions for {model_name} model...")
    visualize_model(model)
    print(f"Finished visualizing {model_name} model.\n")


## Saving and Loading Models in PyTorch

### Introduction
PyTorch provides multiple ways to save and load models. Choosing the right method depends on your specific needs.

### Saving Methods

#### 1. Saving the Entire Model
Saves the whole model, including its architecture and parameters.
```python
torch.save(model, 'model.pth')
