In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
import zipfile
import os
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from torch.utils.data import Dataset

In [1]:
checkpoint_path = '/home/vmchura/data/009.ImageProcessing/model_checkpoint_scratch_05_complex.pth'
metrics_path = "/home/vmchura/data/009.ImageProcessing/training_metrics_scratch_05_complex.json"


In [3]:
train_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])




train_dataset = datasets.ImageFolder(root="/home/vmchura/data/009.ImageProcessing/dataset_CIFAR10/train", transform=train_transform)
test_dataset = datasets.ImageFolder(root="/home/vmchura/data/009.ImageProcessing/dataset_CIFAR10/validation", transform=test_transform)


In [4]:

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)


class ImageClassifier(nn.Module):
    def __init__(self, num_classes=10):
        super(ImageClassifier, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(256)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(512)
        self.relu4 = nn.ReLU()
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(512 * 4 * 4, 1024)
        self.dropout1 = nn.Dropout(0.5)
        self.relu5 = nn.ReLU()
        self.fc2 = nn.Linear(1024, 512)
        self.dropout2 = nn.Dropout(0.5)
        self.relu6 = nn.ReLU()
        self.fc3 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool1(self.relu1(self.bn1(self.conv1(x))))
        x = self.pool2(self.relu2(self.bn2(self.conv2(x))))
        x = self.pool3(self.relu3(self.bn3(self.conv3(x))))
        x = self.pool4(self.relu4(self.bn4(self.conv4(x))))

        x = x.view(-1, 512 * 4 * 4)

        x = self.dropout1(self.relu5(self.fc1(x)))
        x = self.dropout2(self.relu6(self.fc2(x)))
        x = self.fc3(x)
        return x

    def predict(self, x):
        return torch.softmax(self.forward(x), dim=1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(train_dataset.classes)
criterion = nn.CrossEntropyLoss()

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [5]:
def train_iteration(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    average_loss = 0
    average_accuracy = 0
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        predictions = model(images)
        loss = loss_fn(predictions, labels)
        loss.backward()
        optimizer.step()


        average_loss += loss.item()
        average_accuracy += (predictions.argmax(1) == labels).type(torch.float).sum().item()

    average_loss /= len(dataloader)
    average_accuracy /= size
    return {'average_loss': average_loss, 'average_accuracy': average_accuracy}

def test_iteration(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    average_loss = 0
    average_accuracy = 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            predictions = model(images)
            average_loss += loss_fn(predictions, labels).item()
            average_accuracy += (predictions.argmax(1) == labels).type(torch.float).sum().item()
    average_loss /= num_batches
    average_accuracy /= size
    return {'average_loss': average_loss, 'average_accuracy': average_accuracy}

In [6]:
model = ImageClassifier(num_classes=len(train_dataset.classes)).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

try:
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch']
    print(f"Resuming training from epoch {start_epoch}")
except FileNotFoundError:
    print("No checkpoint found. Starting training from scratch.")
    start_epoch = 0

No checkpoint found. Starting training from scratch.


In [7]:
import json

def save_metrics(epoch, train_results, test_results, filename):
    try:
        with open(filename, 'r') as f:
            metrics = json.load(f)
    except FileNotFoundError:
        metrics = []

    metrics.append({
        'epoch': epoch + 1,
        'train_loss': train_results['average_loss'],
        'train_accuracy': train_results['average_accuracy'],
        'test_loss': test_results['average_loss'],
        'test_accuracy': test_results['average_accuracy']
    })

    with open(filename, 'w') as f:
        json.dump(metrics, f, indent=4)

In [8]:
num_epochs = 60
for epoch in range(start_epoch, num_epochs):
    print(f"Epoch {epoch+1}\n-------------------------------")
    train_results = train_iteration(train_loader, model, criterion, optimizer)
    test_results = test_iteration(test_loader, model, criterion)
    save_metrics(epoch, train_results, test_results, metrics_path)
    print("Metrics saved to training_metrics_scratch_05_complex.json")
    print(f"Train Loss: {train_results['average_loss']}, Train Accuracy: {train_results['average_accuracy']}")
    print(f"Test Loss: {test_results['average_loss']}, Test Accuracy: {test_results['average_accuracy']}")
    checkpoint = {
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict()
    }
    torch.save(checkpoint, checkpoint_path)
    print(f"Checkpoint saved at {checkpoint_path}")

print("Finished Training")

Epoch 1
-------------------------------
Metrics saved to training_metrics_scratch_05_complex.json
Train Loss: 1.9408661337852477, Train Accuracy: 0.279875
Test Loss: 1.631200848676907, Test Accuracy: 0.4193
Checkpoint saved at /home/vmchura/data/009.ImageProcessing/model_checkpoint_scratch_05_complex.pth
Epoch 2
-------------------------------
Metrics saved to training_metrics_scratch_05_complex.json
Train Loss: 1.6210318612098693, Train Accuracy: 0.408075
Test Loss: 1.3326427110086996, Test Accuracy: 0.5309
Checkpoint saved at /home/vmchura/data/009.ImageProcessing/model_checkpoint_scratch_05_complex.pth
Epoch 3
-------------------------------
Metrics saved to training_metrics_scratch_05_complex.json
Train Loss: 1.4434625956535339, Train Accuracy: 0.482125
Test Loss: 1.2659303879204649, Test Accuracy: 0.5504
Checkpoint saved at /home/vmchura/data/009.ImageProcessing/model_checkpoint_scratch_05_complex.pth
Epoch 4
-------------------------------
Metrics saved to training_metrics_scratc