In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm
import itertools
import random
import time
import os
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
class CNN(nn.Module):
    def __init__(self, num_filters, kernel_size, dropout_rate, num_units1, num_units2):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, num_filters, kernel_size=kernel_size, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        
        # Assuming input size is (3, 64, 64)
        self.input_size = 64
        self.conv_output_size = self._get_conv_output_size(self.input_size, kernel_size, 2)  # 2 is the pool size

        self.fc1 = nn.Linear(num_filters * self.conv_output_size * self.conv_output_size, num_units1)
        self.fc2 = nn.Linear(num_units1, num_units2)
        self.fc3 = nn.Linear(num_units2, 5)  # 5 classes for airfield, bus stand, canyon, market, temple
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(-1, self.num_flat_features(x))
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

    def _get_conv_output_size(self, input_size, kernel_size, pool_size):
        # Calculate output size after a single conv + pool layer
        # Output size after convolution
        conv_output_size = (input_size - (kernel_size - 1) - 1 + 2) // 1 + 1
        # Output size after pooling
        conv_output_size = (conv_output_size - (pool_size - 1) - 1) // pool_size + 1
        return conv_output_size


In [3]:
def load_data(root_dir, batch_size):
    transform = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    train_dataset = datasets.ImageFolder(root=f'{root_dir}/train', transform=transform)
    test_dataset = datasets.ImageFolder(root=f'{root_dir}/test', transform=transform)
    validation_dataset = datasets.ImageFolder(root=f'{root_dir}/validation', transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)

    class_names = train_dataset.classes

    return train_loader, test_loader, validation_loader, class_names


In [4]:
def evaluate_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc='Evaluating', leave=False):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    return accuracy

In [5]:
def get_predictions(model, data_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    return all_preds, all_labels

In [6]:
def plot_confusion_matrix(cm, class_names, save_path):
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.savefig(save_path)
    plt.close()

In [7]:
# Load the saved model
saved_model_path = 'best_model_20240615_101908.pth'
num_filters = 32  # or the value used in your best model
kernel_size = 5  # or the value used in your best model
dropout_rate = 0.0  # or the value used in your best model
num_units1 = 64  # or the value used in your best model
num_units2 = 32  # or the value used in your best model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN(num_filters, kernel_size, dropout_rate, num_units1, num_units2).to(device)
model.load_state_dict(torch.load(saved_model_path, map_location=device))

# Load data
root_dir = "D:\Study\COMP 6721\COMP6721-project\Dataset"
batch_size = 32  # or the value used in your best model
train_loader, test_loader, validation_loader, class_names = load_data(root_dir, batch_size)

# Evaluate on test set
test_accuracy = evaluate_model(model, test_loader, device)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Get predictions and plot confusion matrix
test_preds, test_labels = get_predictions(model, test_loader, device)
cm = confusion_matrix(test_labels, test_preds)
plot_confusion_matrix(cm, class_names, 'confusion_matrix2.png')

                                                                                                                       

Test Accuracy: 0.7650


In [8]:
print(model)

CNN(
  (conv1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=30752, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=5, bias=True)
  (dropout): Dropout(p=0.0, inplace=False)
)
