In [2]:
%pip install torch torchvision
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models  # Import models to use VGG16
from torch.utils.data import DataLoader, Dataset, ConcatDataset
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import random
from PIL import Image, UnidentifiedImageError  # Import UnidentifiedImageError for handling specific exceptions


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.3.2 -> 24.2
[notice] To update, run: C:\Users\patel\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [47]:
torch.cuda.empty_cache()


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class VGG16Model(nn.Module):
    def __init__(self):
        super(VGG16Model, self).__init__()
        self.vgg16 = models.vgg16(pretrained=True)  # Load the VGG16 model with pretrained weights
        self.vgg16.classifier[6] = nn.Linear(4096, 2)  # Change the final layer to output 2 classes

    def forward(self, x):
        return self.vgg16(x)

class CustomMRIImageDataset(Dataset):
    def __init__(self, root_dir, label, transform=None):
        self.root_dir = root_dir
        self.label = label
        self.transform = transform
        self.image_paths = []
        for image_file in os.listdir(root_dir):
            if image_file.endswith('.png'):  # Ensure only PNG images are considered
                self.image_paths.append(os.path.join(root_dir, image_file))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        try:
            image = Image.open(img_path).convert("RGB")  # Convert to RGB
            if self.transform:
                image = self.transform(image)
            return image, self.label  # Return image and its label
        except (IOError, UnidentifiedImageError) as e:
            print(f"Error loading image {img_path}: {e}")
            # Return a dummy image (e.g., a black image) if loading fails
            dummy_image = Image.new("RGB", (224, 224), (0, 0, 0))  # Create a black image
            return self.transform(dummy_image), self.label  # Return dummy image and label
# Create new datasets with the selected indices
class SubsetDataset(Dataset):
    def __init__(self, dataset, indices):
        self.dataset = dataset
        self.indices = indices

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        return self.dataset[self.indices[idx]]
# Step 5: Define transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.Grayscale(num_output_channels=3),  
    transforms.ToTensor(),            
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

data_dir_pd = "C:\\Users\\patel\\Downloads\\MRI_data-20241026T213243Z-001\\MRI_data\\PD Patient MRIs"
data_dir_non_pd = "C:\\Users\\patel\\Downloads\\MRI_data-20241026T213243Z-001\\MRI_data\\Non PD Patient MRIs"

# Step 7: Create datasets for both PD and Non-PD Patients
pd_dataset = CustomMRIImageDataset(data_dir_pd, label=1, transform=transform)
non_pd_dataset = CustomMRIImageDataset(data_dir_non_pd, label=0, transform=transform)

def select_random_samples(dataset, num_samples):
    if len(dataset) < num_samples:
        raise ValueError(f"Dataset contains only {len(dataset)} images, cannot select {num_samples}")
    return random.sample(range(len(dataset)), num_samples)

pd_indices = select_random_samples(pd_dataset, 1000)
non_pd_indices = select_random_samples(non_pd_dataset, 1000)
# Create subsets for training
pd_subset = SubsetDataset(pd_dataset, pd_indices)
non_pd_subset = SubsetDataset(non_pd_dataset, non_pd_indices)
train_combined = ConcatDataset([pd_subset, non_pd_subset])
pd_all_indices = list(range(len(pd_dataset)))
non_pd_all_indices = list(range(len(non_pd_dataset)))
pd_used_indices = pd_indices
non_pd_used_indices = non_pd_indices

# Create testing indices
pd_test_indices = list(set(pd_all_indices) - set(pd_used_indices))
non_pd_test_indices = list(set(non_pd_all_indices) - set(non_pd_used_indices))

# Create SubsetDatasets for testing
pd_test_subset = SubsetDataset(pd_dataset, pd_test_indices)
non_pd_test_subset = SubsetDataset(non_pd_dataset, non_pd_test_indices)

# Combine the test datasets
test_combined = ConcatDataset([pd_test_subset, non_pd_test_subset])
print(f"Total number of samples in testing dataset: {len(test_combined)}")

# Create DataLoaders for training and testing datasets
train_loader = DataLoader(train_combined, batch_size=32, shuffle=True)
test_loader = DataLoader(test_combined, batch_size=32, shuffle=False)
# Step 9: Initialize model, loss function, and optimizer
vgg_model = VGG16Model().to(device)  # Use VGG16 model
criterion = nn.CrossEntropyLoss()  # Use CrossEntropy for multi-class classification
optimizer = optim.SGD(vgg_model.parameters(), lr=.01)

# Step 10: Training loop
num_epochs = 10
for epoch in range(num_epochs):
    vgg_model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()  # Zero the gradients
        outputs = vgg_model(images)  # Forward pass
        loss = criterion(outputs, labels)  # Calculate loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights
        
        running_loss += loss.item()

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}')
scripted_vgg_model = torch.jit.script(vgg_model)
torch.jit.save(scripted_vgg_model, 'parkinsons_vgg_model_scripted.pth')


Total number of samples in testing dataset: 1621




Epoch [1/10], Loss: 0.5735
Epoch [2/10], Loss: 0.3280
Epoch [3/10], Loss: 0.1337
Epoch [4/10], Loss: 0.0366
Epoch [5/10], Loss: 0.3953
Epoch [6/10], Loss: 0.1042
Epoch [7/10], Loss: 0.0127
Epoch [8/10], Loss: 0.0040
Epoch [9/10], Loss: 0.0022
Epoch [10/10], Loss: 0.0011


In [8]:
def evaluate_cnn_model(model_path, test_loader, device):
    # Load the TorchScript model directly to the specified device
    model = torch.jit.load(model_path).to(device)
    model.eval()  

    all_predictions = []
    all_labels = []

    with torch.no_grad():  # Disable gradient calculation for evaluation
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)  
            _, predicted = torch.max(outputs, 1)
            
            all_predictions.extend(predicted.cpu().numpy()) 
            all_labels.extend(labels.cpu().numpy())

    all_predictions = np.array(all_predictions)
    all_labels = np.array(all_labels)

    accuracy = np.sum(all_predictions == all_labels) / len(all_labels)
    print(f'Accuracy: {accuracy * 100:.2f}%')
    return all_predictions, all_labels

# Call the function with the model path and test loader
evaluate_cnn_model("C:/Users/patel/Downloads/reu-dataset-analysis/parkinsons_vgg_model_scripted.pth", test_loader, device)


Error loading image C:\Users\patel\Downloads\MRI_data-20241026T213243Z-001\MRI_data\PD Patient MRIs\SAG+C_006.png: cannot identify image file 'C:\\Users\\patel\\Downloads\\MRI_data-20241026T213243Z-001\\MRI_data\\PD Patient MRIs\\SAG+C_006.png'
Error loading image C:\Users\patel\Downloads\MRI_data-20241026T213243Z-001\MRI_data\Non PD Patient MRIs\ciss3d_SAG_017.png: cannot identify image file 'C:\\Users\\patel\\Downloads\\MRI_data-20241026T213243Z-001\\MRI_data\\Non PD Patient MRIs\\ciss3d_SAG_017.png'
Accuracy: 99.51%


(array([1, 1, 1, ..., 0, 0, 0], dtype=int64),
 array([1, 1, 1, ..., 0, 0, 0], dtype=int64))