In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm
import itertools
import time
import random
import os
from PIL import Image

In [None]:
class CNN(nn.Module):
    def __init__(self, num_filters, kernel_size, dropout_rate, num_units1, num_units2):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, num_filters, kernel_size=kernel_size, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        
        # Assuming input size is (3, 64, 64)
        self.input_size = 64
        self.conv_output_size = self._get_conv_output_size(self.input_size, kernel_size, 2)  # 2 is the pool size

        self.fc1 = nn.Linear(num_filters * self.conv_output_size * self.conv_output_size, num_units1)
        self.fc2 = nn.Linear(num_units1, num_units2)
        self.fc3 = nn.Linear(num_units2, 5)  # 5 classes for airfield, bus stand, canyon, market, temple
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(-1, self.num_flat_features(x))
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

    def _get_conv_output_size(self, input_size, kernel_size, pool_size):
        # Calculate output size after a single conv + pool layer
        # Output size after convolution
        conv_output_size = (input_size - (kernel_size - 1) - 1 + 2) // 1 + 1
        # Output size after pooling
        conv_output_size = (conv_output_size - (pool_size - 1) - 1) // pool_size + 1
        return conv_output_size


In [None]:
def load_data(root_dir, batch_size):
    transform = transforms.Compose([
        transforms.Resize((64, 64)),  # Resize images to a consistent size
        transforms.ToTensor(),        # Convert images to PyTorch tensors
        transforms.Normalize((0.5,), (0.5,))  # Normalize the images
    ])

    # Load datasets using ImageFolder
    train_dataset = datasets.ImageFolder(root=os.path.join(root_dir, 'train'), transform=transform)
    test_dataset = datasets.ImageFolder(root=os.path.join(root_dir, 'test'), transform=transform)
    validation_dataset = datasets.ImageFolder(root=os.path.join(root_dir, 'validation'), transform=transform)

    # Create data loaders for train, test, and validation datasets
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader, validation_loader, train_dataset.classes

In [None]:
# Define the training function
def train_model(model, train_loader, criterion, optimizer, device, num_epochs):
    model.train()  # Set the model to training mode
    for epoch in range(num_epochs):
        running_loss = 0.0
        # Initialize tqdm for the training loop
        progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}', leave=False)
        for inputs, labels in progress_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()  # Clear the gradients
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute the loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update the weights
            running_loss += loss.item()
            # Update tqdm with the loss
            progress_bar.set_postfix(loss=running_loss / len(train_loader))
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / len(train_loader):.4f}')
    return model

In [None]:
def evaluate_model(model, data_loader, device):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():  # Disable gradient computation
        for inputs, labels in tqdm(data_loader, desc='Evaluating', leave=False):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)  # Forward pass
            _, predicted = torch.max(outputs.data, 1)  # Get the predicted class
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    return accuracy

In [None]:
# Define the function to predict a single image
def predict_single_image(model, image_path, transform, device, class_names):
    model.eval()  # Set the model to evaluation mode
    image = Image.open(image_path)
    image_tensor = transform(image).unsqueeze(0).to(device)  # Preprocess and add batch dimension
    with torch.no_grad():  # Disable gradient computation
        output = model(image_tensor)  # Forward pass
        _, predicted = torch.max(output, 1)  # Get the predicted class index
    return class_names[predicted.item()]  # Return the predicted class name

In [None]:
# Hyperparameter grid
param_grid = {
    'num_filters': [32, 64],
    'kernel_size': [3, 5],
    'dropout_rate': [0.0, 0.2],
    'num_units1': [32, 64],
    'num_units2': [32, 64],
    'learning_rate': [1e-4, 1e-2],
    'batch_size': [32, 64]
}

param_combinations = list(itertools.product(
    param_grid['num_filters'],
    param_grid['kernel_size'],
    param_grid['dropout_rate'],
    param_grid['num_units1'],
    param_grid['num_units2'],
    param_grid['learning_rate'],
    param_grid['batch_size']
))

In [None]:
random_indexes = random.sample(range(len(param_combinations)), 10)
random_param_combinations = [param_combinations[i] for i in random_indexes]

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
best_accuracy = 0.0
best_params = None
best_model = None

print("Testing " + str(len(random_param_combinations)) + " combination of hyperparameters.")
combination_num = 1
for params in random_param_combinations:
    print("00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000")
    print("Combination - " + str(combination_num))
    print(params)
    num_filters, kernel_size, dropout_rate, num_units1, num_units2, learning_rate, batch_size = params

    model = CNN(num_filters, kernel_size, dropout_rate, num_units1, num_units2).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Load data and get class names
    root_dir = "D:/Study/COMP 6721/COMP6721-project/Dataset"
    train_loader, test_loader, validation_loader, class_names = load_data(root_dir, batch_size)

    num_epochs = 10
    model = train_model(model, train_loader, criterion, optimizer, device, num_epochs)

    # Evaluate on validation set
    validation_accuracy = evaluate_model(model, validation_loader, device)
    print(f"Params: {params}, Validation Accuracy: {validation_accuracy:.4f}")
    combination_num += 1

    if validation_accuracy > best_accuracy:
        best_accuracy = validation_accuracy
        best_params = params
        best_model = model.state_dict()

# Save the best model's state dictionary to a file
timestamp = time.strftime('%Y%m%d_%H%M%S')
torch.save(best_model, f'best_model_{timestamp}.pth')
print(best_params)

In [None]:
# Load the best model and evaluate on the test set
best_model_loaded = CNN(best_params[0], best_params[1], best_params[2], best_params[3], best_params[4]).to(device)
best_model_loaded.load_state_dict(best_model)
test_accuracy = evaluate_model(best_model_loaded, test_loader, device)
print(f"Best Parameters: {best_params}, Test Accuracy: {test_accuracy:.4f}")

In [None]:
from torchviz import make_dot
def visualize_model(model, input_size=(3, 64, 64)):
    x = torch.randn(1, *input_size).to(next(model.parameters()).device)
    y = model(x)
    make_dot(y, params=dict(list(model.named_parameters()) + [('x', x)])).render("cnn_torchviz", format="png")

In [None]:
visualize_model(best_model_loaded)

In [None]:
# Example usage of predicting a single image
image_path_to_predict = "D:/Study/COMP 6721/COMP6721-project/Phase2-Notebooks/test_images/00000070.jpg"  # Replace with your image path
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

predicted_class = predict_single_image(best_model_loaded, image_path_to_predict, transform, device, class_names)
print(f'Predicted class for {image_path_to_predict}: {predicted_class}')