In [119]:
# -*- coding: utf-8 -*-

'''
This script was written as a test for the combined pnemunoina models to observe how well it can do on an unseen set of data. 
Since there aren't many other reliable pneumonia datasets available online, I chose to use a COVID-19 dataset that I found on Kaggle.
Even if there are generally quite a few differences between the two diseases in terms of how they appear in X-ray imagery, the symptoms are very similar. 
Hence why I wanted to see how well the model could perform on a different dataset without actual training.

Reference:
Khoong, W.H. (2020) COVID-19 X-ray Dataset: Train-Test Sets. Available at: https://www.kaggle.com/datasets/khoongweihao/covid19-xray-dataset-train-test-sets (Accessed: 9 January 2025).
'''

import sys
import zipfile
import torch
import torch.nn as nn
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
import matplotlib.pyplot as plt
import requests
import pathlib

sys.path.append("../")
import Utils.TorchUtils as Utils

device = Utils.get_device()
print(f"Found Device: {device}")

Found Device: mps


In [95]:
DATASET_LOCATION = pathlib.Path('covid19_xray')

DATASET_LOCATION.mkdir(parents=True, exist_ok=True)

url = "https://www.kaggle.com/api/v1/datasets/download/khoongweihao/covid19-xray-dataset-train-test-sets"
zip_path = pathlib.Path('~/Downloads/chest-xray-pneumonia.zip').expanduser()

zip_path.parent.mkdir(parents=True, exist_ok=True)

response = requests.get(url, stream=True)
with open(zip_path, 'wb') as file:
    for chunk in response.iter_content(chunk_size=128):
        file.write(chunk)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(DATASET_LOCATION)

print(f"Dataset location: {DATASET_LOCATION}")

Dataset location: covid19_xray


In [120]:
MODEL_PATH = 'combined_pneumonia_models.pth'
NEW_DATA_PATH = 'covid19_xray'
BATCH_SIZE = 32

In [121]:
# The test matches the architecture used for the saved model 
def build_model(weights='IMAGENET1K_V1'):
    model = models.resnet34(weights=weights)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 2)
    return model

resnet34 = build_model()

PRETRAINED_MODEL_PATH = "combined_pneumonia_models.pth"
device = device
state_dict = torch.load(PRETRAINED_MODEL_PATH, map_location=device, weights_only=True)

class CombinedModel(nn.Module):
    def __init__(self, pretrained_model, state_dict):
        super(CombinedModel, self).__init__()
        self.pretrained_model = pretrained_model
        
        self.feature_extractor = nn.Sequential(*list(self.pretrained_model.children())[:-2])
    
        self.pretrained_model.load_state_dict(state_dict, strict=False)

        self.additional_layers = nn.Sequential(
            nn.Conv2d(512, 8, kernel_size=3, padding=1),
            nn.BatchNorm2d(8),
            nn.ReLU(inplace=True),
            nn.Dropout2d(0.3),
            nn.AdaptiveAvgPool2d((28, 28)),
            nn.Flatten(),
            nn.Linear(8 * 28 * 28, 512),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(512),
            nn.Dropout(p=0.5),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(256),
            nn.Dropout(p=0.5),
            nn.Linear(256, 2)
        )

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.additional_layers(x)
        return x

model = CombinedModel(pretrained_model=resnet34, state_dict=state_dict).to(device)
model.eval() 
print(f"Final model loaded and moved to {device}")

Final model loaded and moved to mps


In [122]:
# I used a simpler set of transforms, but still similar to the ones used in the training process the model is built on
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_dataset = datasets.ImageFolder("covid19_xray/xray_dataset_covid19", transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"Test dataset contains {len(test_dataset)} samples across {len(test_dataset.classes)} classes")

Test dataset contains 188 samples across 2 classes


In [123]:
# Here we pass new data through the model to obtian predictions on the new dataset
all_predictions = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
      
        outputs = model(inputs)
        _, predictions = torch.max(outputs, 1)
        
        
        all_predictions.extend(predictions.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print("Inference completed")

Inference completed


In [124]:
class_names = ["NORMAL", "COVID"]

accuracy = accuracy_score(all_labels, all_predictions)
print(f"Test Accuracy: {accuracy:.2f}")

report = classification_report(all_labels, all_predictions, target_names=class_names, zero_division=0, output_dict=True)

for class_name in class_names:
    print(f"Class: {class_name}")
    print(f"Precision: {report[class_name]['precision']:.2f}")
    print(f"Recall: {report[class_name]['recall']:.2f}")
    print(f"F1-Score: {report[class_name]['f1-score']:.2f}")
    print(f"Support: {report[class_name]['support']}")

Test Accuracy: 0.70
Class: NORMAL
Precision: 0.17
Recall: 0.10
F1-Score: 0.12
Support: 40.0
Class: COVID
Precision: 0.78
Recall: 0.86
F1-Score: 0.82
Support: 148.0
