In [1]:
import os  # For file manipulation
import torch  # Machine learning library
import numpy as np  # For math library and matrices
import random  # For random operations

from PIL import Image  # For image processing
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.utils import shuffle
from sklearn.manifold import TSNE
from sklearn.svm import SVC

from torchvision import transforms, models  # For image transformations and pre-trained models
from torch.utils.data import Dataset, DataLoader  # For creating custom datasets and managing batches
import torch.nn as nn  # For building neural networks
import matplotlib.pyplot as plt  # For graphing
from google.colab import drive  # For accessing Google Drive

To enable Colab's local cuda GPU do the following: Go to Runtime > Change runtime type > Set Hardware accelerator to GPU > Save.

In [None]:
print(torch.cuda.is_available()) # See if cuda GPU is accessible

In [2]:
drive.mount('/content/drive') # Mounting Google Drive
# Copy dataset from Google Drive to Colab's local storage (if not too large)
!cp -r /content/drive/MyDrive/TestDataset /content/dataset/

drive_path = '/content/dataset/'

Mounted at /content/drive


In [3]:
# Load in our trained model from Google Drive
my_model = models.resnet18(weights=None)
model_path = '/content/drive/MyDrive/COMP432Project/resnet18_model_V1.pth'
my_model.load_state_dict(torch.load(model_path, map_location=torch.device('cuda')))
my_model.eval()

  my_model.load_state_dict(torch.load(model_path, map_location=torch.device('cuda')))


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [10]:
def create_image_path_array(image_directory):
  # Empty list for all our processed images
  image_paths = []

  # Traverse the directory and subdirectories to find all .tif files
  for root, dirs, files in os.walk(image_directory):
      for filename in files:
          if filename.endswith(".tif"):
              # Get full image path
              image_path = os.path.join(root, filename)
              image_paths.append(image_path)


  return image_paths

In [18]:
  # Get the paths of all images to allow for splitting into train and test and random shuffling
  MUS_paths = create_image_path_array('/content/drive/MyDrive/TestDataset/colorectal_cancer/MUS')
  NORM_paths = create_image_path_array('/content/drive/MyDrive/TestDataset/colorectal_cancer/NORM')
  STR_paths = create_image_path_array('/content/drive/MyDrive/TestDataset/colorectal_cancer/STR')

  # Create output labels for our images
  MUS_labels = [0] * len(MUS_paths)
  NORM_labels = [1] * len(NORM_paths)
  STR_labels = [2] * len(STR_paths)

  # Combine all paths and labels into one list
  all_paths = MUS_paths + NORM_paths + STR_paths
  all_labels = MUS_labels + NORM_labels + STR_labels

  # Shuffle our training data
  all_paths, all_labels = shuffle(all_paths, all_labels, random_state=42)
  print(len(all_paths))

100


In [19]:
class ImageDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        # Load the image
        image = Image.open(self.image_paths[index]).convert('RGB')

        # Apply our 'preprocess' transformations (i.e. resizing, normalization)
        if self.transform:
            image = self.transform(image)

        # Get the respective label
        label = self.labels[index]

        return image, label

# Preprocessing transformations for ResNet input
preprocess = transforms.Compose([
  transforms.Resize(256),               # Resize to 256 pixels on shorter side
  transforms.CenterCrop(224),           # Crop center to 224x224
  transforms.ToTensor(),                # Convert to PyTorch tensor
  transforms.Normalize(                 # Normalize using ImageNet mean and std
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
  ),
])
# Create dataset
test_dataset = ImageDataset(all_paths, all_labels, transform=preprocess)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [20]:
# Utility to calculate accuracy
def calculate_accuracy(predictions, labels):
    return (predictions == labels).sum().item() / len(labels)

In [23]:
# Move the model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
my_model.to(device)  # Move model to the GPU
my_model.eval()  # Set the model to evaluation mode

# Evaluate the model on the entire dataset
all_predictions = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        # Move images and labels to the GPU
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = my_model(images)

        # Get predicted class (highest score)
        _, preds = torch.max(outputs, 1)

        # Append predictions and labels to lists
        all_predictions.extend(preds.cpu().numpy())  # Move to CPU for appending
        all_labels.extend(labels.cpu().numpy())

In [24]:
# Calculate overall accuracy
accuracy = calculate_accuracy(np.array(all_predictions), np.array(all_labels))
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Generate classification report
print("Classification Report:")
report = classification_report(all_labels, all_predictions, target_names=["MUS", "NORM", "STR"])
print(report)

Test Accuracy: 100.00%
Classification Report:
              precision    recall  f1-score   support

         MUS       1.00      1.00      1.00        34
        NORM       1.00      1.00      1.00        33
         STR       1.00      1.00      1.00        33

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100

