In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
import zipfile
import os
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from torch.utils.data import Dataset

In [13]:
checkpoint_path = '/home/vmchura/data/009.ImageProcessing/model_checkpoint_scratch_06.pth'
metrics_path = "/home/vmchura/data/009.ImageProcessing/training_metrics_scratch_06.json"


In [6]:
train_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), # Increased jitter
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)), # Added scaling
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)) # Correct CIFAR-10 normalization
])

test_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
])



train_dataset = datasets.ImageFolder(root="/home/vmchura/data/009.ImageProcessing/dataset_CIFAR10/train", transform=train_transform)
test_dataset = datasets.ImageFolder(root="/home/vmchura/data/009.ImageProcessing/dataset_CIFAR10/validation", transform=test_transform)


In [7]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)


class ImageClassifier(nn.Module):
    def __init__(self, num_classes=10):
        super(ImageClassifier, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.bn1 = nn.BatchNorm2d(64)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.bn2 = nn.BatchNorm2d(128)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.relu3 = nn.ReLU()
        self.bn3 = nn.BatchNorm2d(256)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(256 * 4 * 4, 512)
        self.relu4 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool1(self.bn1(self.relu1(self.conv1(x))))
        x = self.pool2(self.bn2(self.relu2(self.conv2(x))))
        x = self.pool3(self.bn3(self.relu3(self.conv3(x))))
        x = x.view(-1, 256 * 4 * 4)
        x = self.dropout1(self.relu4(self.fc1(x)))
        x = self.fc2(x)
        return x

    def predict(self, x):
        return torch.softmax(self.forward(x), dim=1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(train_dataset.classes)
criterion = nn.CrossEntropyLoss()

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [8]:
def train_iteration(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    average_loss = 0
    average_accuracy = 0
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        predictions = model(images)
        loss = loss_fn(predictions, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        average_loss += loss.item()
        average_accuracy += (predictions.argmax(1) == labels).type(torch.float).sum().item()

    average_loss /= len(dataloader)
    average_accuracy /= size
    return {'average_loss': average_loss, 'average_accuracy': average_accuracy}

def test_iteration(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    average_loss = 0
    average_accuracy = 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            predictions = model(images)
            average_loss += loss_fn(predictions, labels).item()
            average_accuracy += (predictions.argmax(1) == labels).type(torch.float).sum().item()
    average_loss /= num_batches
    average_accuracy /= size
    return {'average_loss': average_loss, 'average_accuracy': average_accuracy}

In [18]:
model = ImageClassifier(num_classes=len(train_dataset.classes)).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

try:
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch']
    print(f"Resuming training from epoch {start_epoch}")
except FileNotFoundError:
    print("No checkpoint found. Starting training from scratch.")
    start_epoch = 0

Resuming training from epoch 5


In [19]:
import json

def save_metrics(epoch, train_results, test_results, filename):
    try:
        with open(filename, 'r') as f:
            metrics = json.load(f)
    except FileNotFoundError:
        metrics = []

    metrics.append({
        'epoch': epoch + 1,
        'train_loss': train_results['average_loss'],
        'train_accuracy': train_results['average_accuracy'],
        'test_loss': test_results['average_loss'],
        'test_accuracy': test_results['average_accuracy']
    })

    with open(filename, 'w') as f:
        json.dump(metrics, f, indent=4)

In [20]:
num_epochs = 60
for epoch in range(start_epoch, num_epochs):
    print(f"Epoch {epoch+1}\n-------------------------------")
    train_results = train_iteration(train_loader, model, criterion, optimizer)
    test_results = test_iteration(test_loader, model, criterion)
    save_metrics(epoch, train_results, test_results, metrics_path)
    print("Metrics saved to training_metrics_scratch_06.json")
    print(f"Train Loss: {train_results['average_loss']}, Train Accuracy: {train_results['average_accuracy']}")
    print(f"Test Loss: {test_results['average_loss']}, Test Accuracy: {test_results['average_accuracy']}")
    # Save model state after each epoch
    checkpoint = {
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_loss': train_results['average_loss'],
        'train_accuracy': train_results['average_accuracy'],
        'test_loss': test_results['average_loss'],
        'test_accuracy': test_results['average_accuracy']
    }
    torch.save(checkpoint, checkpoint_path)
    print(f"Checkpoint saved at {checkpoint_path}")

print("Finished Training")

Epoch 6
-------------------------------
Metrics saved to training_metrics_scratch_06.json
Train Loss: 1.1019086255788804, Train Accuracy: 0.621475
Test Loss: 0.802418873618586, Test Accuracy: 0.7264
Checkpoint saved at /home/vmchura/data/009.ImageProcessing/model_checkpoint_scratch_06.pth
Epoch 7
-------------------------------
Metrics saved to training_metrics_scratch_06.json
Train Loss: 1.04795581138134, Train Accuracy: 0.64035
Test Loss: 0.7530800975597324, Test Accuracy: 0.7427
Checkpoint saved at /home/vmchura/data/009.ImageProcessing/model_checkpoint_scratch_06.pth
Epoch 8
-------------------------------
Metrics saved to training_metrics_scratch_06.json
Train Loss: 1.0065943698883058, Train Accuracy: 0.6522
Test Loss: 0.7397101495664912, Test Accuracy: 0.7494
Checkpoint saved at /home/vmchura/data/009.ImageProcessing/model_checkpoint_scratch_06.pth
Epoch 9
-------------------------------
Metrics saved to training_metrics_scratch_06.json
Train Loss: 0.9880403145074844, Train Accur