In [1]:
import os
import cv2
import numpy as np
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split

In [2]:
if torch.cuda.is_available():
    device = 'cuda' 
elif torch.backends.mps.is_available():
    device = 'mps'   
else:
    device = 'cpu'  

dataset_path = 'APS360_Project_Dataset/dataset4_preprocessed_augmented'

def load_images_for_dataset4(base_path):
    images = []
    labels = []
    
    for label in os.listdir(base_path):
        folder_path = os.path.join(base_path, label)
        
        if os.path.isdir(folder_path):
            for img_file in os.listdir(folder_path):
                img_path = os.path.join(folder_path, img_file)
                image = cv2.imread(img_path)
                
                if image is not None:
                    images.append(image) 
                    labels.append(int(label))  # Folder name is the label (0-9)
                else:
                    print(f"Warning: Failed to load image {img_path}")
    
    return images, labels

def split_data(images, labels, test_size=0.1, val_size=0.2, random_state=42):
    train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size=test_size, random_state=random_state)
    train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=val_size, random_state=random_state)
    return train_images, train_labels, val_images, val_labels, test_images, test_labels


In [3]:
class DigitDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
         # Convert image to float32 and normalize to [0, 1]
        if image.dtype != np.float32:
            image = image.astype(np.float32) / 255.0
        
        # If grayscale, convert to a single channel
        if image.shape[-1] == 3:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        # Convert to tensor
        image = torch.tensor(image).unsqueeze(0)  # Add channel dimension for grayscale (1, H, W)
        label = torch.tensor(label, dtype=torch.long)
        return image, label
    
def create_dataloaders(batch_size, train_images, train_labels, val_images, val_labels, test_images, test_labels):
    # Create Dataset instances
    train_dataset = DigitDataset(train_images, train_labels)
    val_dataset = DigitDataset(val_images, val_labels)
    test_dataset = DigitDataset(test_images, test_labels)

    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader

In [4]:
images, labels = load_images_for_dataset4(dataset_path)

train_images, train_labels, val_images, val_labels, test_images, test_labels = split_data(images, labels)

batch_size = 8
train_loader, val_loader, test_loader = create_dataloaders(batch_size, train_images, train_labels, val_images, val_labels, test_images, test_labels)

# Print dataset sizes for verification
print(f"Train set: {len(train_loader.dataset)} images")
print(f"Validation set: {len(val_loader.dataset)} images")
print(f"Test set: {len(test_loader.dataset)} images")

for images, labels in train_loader:
    print(images.shape)  # Should print torch.Size([batch_size, channels, height, width])
    break

Train set: 2052 images
Validation set: 513 images
Test set: 285 images
torch.Size([8, 1, 224, 224])


In [5]:
import torch.nn as nn
import torch.nn.functional as F

class DigitCNN(nn.Module):
    def __init__(self):
        super(DigitCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 28 * 28, 256)
        self.fc2 = nn.Linear(256, 10)  # 10 classes for digits 0-9

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 128 * 28 * 28)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [6]:
def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            # print(images.shape)
            # print(labels.shape)
            optimizer.zero_grad()
            outputs = model(images)
            # print(outputs.shape)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")
        
        # Validation phase
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_acc = 100 * correct / total
        print(f"Validation Accuracy: {val_acc:.2f}%")

def test_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc = 100 * correct / total
    print(f"Test Accuracy: {test_acc:.2f}%")

In [7]:
model = DigitCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=15)

# Test the model
test_model(model, test_loader)


Epoch 1/15, Loss: 2.3036
Validation Accuracy: 12.48%
Epoch 2/15, Loss: 2.2328
Validation Accuracy: 28.07%
Epoch 3/15, Loss: 1.3438
Validation Accuracy: 64.72%
Epoch 4/15, Loss: 0.5064
Validation Accuracy: 83.24%
Epoch 5/15, Loss: 0.1778
Validation Accuracy: 89.28%
Epoch 6/15, Loss: 0.0603
Validation Accuracy: 90.25%
Epoch 7/15, Loss: 0.0359
Validation Accuracy: 91.03%
Epoch 8/15, Loss: 0.0068
Validation Accuracy: 90.64%
Epoch 9/15, Loss: 0.1023
Validation Accuracy: 86.16%
Epoch 10/15, Loss: 0.1265
Validation Accuracy: 86.94%
Epoch 11/15, Loss: 0.0696
Validation Accuracy: 88.11%
Epoch 12/15, Loss: 0.0073
Validation Accuracy: 87.13%
Epoch 13/15, Loss: 0.0040
Validation Accuracy: 88.69%
Epoch 14/15, Loss: 0.0008
Validation Accuracy: 89.08%
Epoch 15/15, Loss: 0.0005
Validation Accuracy: 88.89%
Test Accuracy: 84.56%
