<a href="https://colab.research.google.com/github/tswiger34/AlzheimersPrediction/blob/main/ImageClassifiers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

>[Load Dependencies](#scrollTo=Mma4gQLDIfCq)

>[Image Loading and Preprocessing](#scrollTo=AwVgzqCBVI8k)

>[Model Training](#scrollTo=kQPo1bPZVPn7)

>>[Create and Load Models](#scrollTo=3Gim6jahVYYi)

>>[Train Models](#scrollTo=W2Lm7kQZVbVL)

>[Model Testing](#scrollTo=2UYtnVA3VTS9)



# Load Dependencies

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Data Cleaning/Manipulation
import pandas as pd
import numpy as np
import os
import nibabel as nib
from PIL import Image
from sklearn.metrics import accuracy_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
from torchvision import transforms, utils
from torchvision.models import resnet18
from torch.nn.functional import softmax

# Load train/test info data frames
train_info = pd.read_csv('/content/drive/MyDrive/PBHLT7120_Project/Data/TrainSets/Images/ImageTrainDataInfo.csv')
train_ptids = train_info['Subject'].unique()
img_metadata = pd.read_csv('/content/drive/MyDrive/PBHLT7120_Project/Data/FullData/CSVFiles/ImageMetadata.csv')
img_train_df = img_metadata[img_metadata['Subject'].isin(train_ptids)]

root_dir = "/content/drive/MyDrive/PBHLT7120_Project/Data/FullData/Images/1.5T_Images"
folders = os.listdir(root_dir)
new_df = pd.DataFrame(columns=['Subject', 'Folder_Num'])

for folder in folders:
   path = os.path.join(root_dir, folder)
   new_df


In [None]:
# Initialize a list to store PTID/ImageID pairs
data = []

## Traverse Folders
for folder in os.listdir(root_dir):
    print(folder)
    folder_path = os.path.join(root_dir, folder)
    if os.path.isdir(folder_path) and folder.startswith('ADNI1_Complete'):
        adni_path = os.path.join(folder_path, 'ADNI')
        if os.path.isdir(adni_path):
            for ptid in os.listdir(adni_path):
                ptid_path = os.path.join(adni_path, ptid)
                if os.path.isdir(ptid_path):
                    # Collect ImageID information
                    for image_name in os.listdir(ptid_path):
                        if os.path.isdir(os.path.join(ptid_path, image_name)):
                            for date_info in os.listdir(os.path.join(ptid_path, image_name)):
                                date_path = os.path.join(ptid_path, image_name, date_info)
                                if os.path.isdir(date_path):
                                    for img_id in os.listdir(date_path):
                                        img_path = os.path.join(date_path, img_id)
                                        if os.path.isdir(img_path):
                                          img_name = os.listdir(img_path)[0]
                                          full_path = os.path.join(img_path, img_name)
                                          data.append({'PatientID': ptid, 'ImageID': img_id, 'FolderName':folder, 'FullPath':full_path})

# Save DataFrame to a CSV file
df = pd.DataFrame(data)
csv_path = os.path.join(root_dir, 'PTID_ImageIDs.csv')
df.to_csv(csv_path, index=False)
print(f"DataFrame created with {len(df)} entries")
print(f"CSV saved at: {csv_path}")

ADNI1_Complete 3Yr 1.5T2
ADNI1_Complete 3Yr 1.5T3
ADNI1_Complete 3Yr 1.5T4
ADNI1_Complete 3Yr 1.5T1
ADNI1_Complete 3Yr 1.5T5
ADNI1_Complete 3Yr 1.5T6
ADNI1_Complete 3Yr 1.5T7
ADNI1_Complete 3Yr 1.5T8
ADNI1_Complete 3Yr 1.5T9
Master
ADNI1_Complete 3Yr 1.5T10
ADNI1_Complete 3Yr 1.5T11
PTID_ImageIDs.csv
DataFrame created with 2170 entries
CSV saved at: /content/drive/MyDrive/PBHLT7120_Project/Data/FullData/Images/1.5T_Images/PTID_ImageIDs.csv


In [None]:
df = pd.read_csv('/content/drive/MyDrive/PBHLT7120_Project/Data/FullData/Images/1.5T_Images/PTID_ImageIDs.csv')
print(len(df))
print(df['FullPath'])

2170
0       /content/drive/MyDrive/PBHLT7120_Project/Data/...
1       /content/drive/MyDrive/PBHLT7120_Project/Data/...
2       /content/drive/MyDrive/PBHLT7120_Project/Data/...
3       /content/drive/MyDrive/PBHLT7120_Project/Data/...
4       /content/drive/MyDrive/PBHLT7120_Project/Data/...
                              ...                        
2165    /content/drive/MyDrive/PBHLT7120_Project/Data/...
2166    /content/drive/MyDrive/PBHLT7120_Project/Data/...
2167    /content/drive/MyDrive/PBHLT7120_Project/Data/...
2168    /content/drive/MyDrive/PBHLT7120_Project/Data/...
2169    /content/drive/MyDrive/PBHLT7120_Project/Data/...
Name: FullPath, Length: 2170, dtype: object


In [None]:
img_metadata = pd.read_csv('/content/drive/MyDrive/PBHLT7120_Project/Data/FullData/CSVFiles/ImageMetadata.csv')
full_df = pd.merge(df, img_metadata, left_on=('ImageID', 'PatientID'), right_on=('Image Data ID', 'Subject'), how='outer')
full_df.to_csv('/content/drive/MyDrive/PBHLT7120_Project/Data/FullData/CSVFiles/PTID_ImageIDs.csv', index=False)

In [None]:
## Final Image Train Data Frame
train_ptids = train_info['Subject'].unique()
img_ptids, lr_ptids = train_test_split(train_ptids, test_size=0.6, random_state=42)
full_train = full_df[full_df['Subject'].isin(img_ptids)]
full_train = full_train.loc[full_train['Visit'].isin(['sc', 'bl', 'm06', 'm12'])]
full_train = pd.merge(full_train, train_info[['Subject','DIAGNOSIS_GROUP']], left_on=('Subject'), right_on=('Subject'), how='right')
full_train = full_train.dropna(subset=['FolderName'])
full_train.reset_index(drop=True, inplace=True)
print(full_train.shape)

## Final Test Data Frame
test_info = pd.read_csv('/content/drive/MyDrive/PBHLT7120_Project/Data/TestSets/Images/ImageTestDataInfo.csv')
test_ptids = test_info['Subject'].unique()
full_test = full_df[full_df['Subject'].isin(test_ptids)]
full_test = full_test.loc[full_test['Visit'].isin(['sc', 'bl', 'm06', 'm12'])]
full_test = pd.merge(full_test, test_info[['Subject','DIAGNOSIS_GROUP']], left_on=('Subject'), right_on=('Subject'), how='left')
full_test = full_test.dropna(subset=['FolderName'])
full_test.reset_index(drop=True, inplace=True)
print(full_test.shape)

## LR Train Data Frame
lr_traininfo = full_df[full_df['Subject'].isin(lr_ptids)]
lr_traininfo = lr_traininfo.loc[lr_traininfo['Visit'].isin(['sc', 'bl', 'm06'])]
lr_traininfo = pd.merge(lr_traininfo, train_info[['Subject','DIAGNOSIS_GROUP']], left_on=('Subject'), right_on=('Subject'), how='right')
lr_traininfo = lr_traininfo.dropna(subset=['FolderName'])
lr_traininfo.reset_index(drop=True, inplace=True)
print(lr_traininfo.shape)

lr_testinfo = full_df[full_df['Subject'].isin(test_ptids)]
lr_testinfo = lr_testinfo.loc[lr_testinfo['Visit'].isin(['sc', 'bl', 'm06'])]
lr_testinfo = pd.merge(lr_testinfo, test_info[['Subject','DIAGNOSIS_GROUP']], left_on=('Subject'), right_on=('Subject'), how='right')
lr_testinfo = lr_testinfo.dropna(subset=['FolderName'])
lr_testinfo.reset_index(drop=True, inplace=True)
print(lr_testinfo.shape)



(391, 17)
(254, 17)
(384, 17)
(171, 17)


In [None]:
## Define Dataset Class
class MRIDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        """
        Args:
            dataframe (pd.DataFrame): DataFrame containing 'PatientID', 'Description', 'ImageID', and 'Group'.
            root_dir (str): Root directory of the dataset.
            transform (callable, optional): Optional transform to be applied on an image.
        """
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        if isinstance(idx, int):
            row = self.dataframe.iloc[idx]
        else:
            row = self.dataframe.loc[idx]
        # Construct image path
        new_image_path = os.path.join(
            self.root_dir,
            f"{row['FolderName']}",
            "ADNI",
            row['Subject'],
            row['Description'].replace(' ', '_').replace(';', '_')
        )
        if not os.path.exists(new_image_path):
          raise FileNotFoundError(f"Directory not found: {new_image_path}")
        if os.path.isdir(new_image_path):
            date_folder = os.listdir(new_image_path)[0]
            date_path = os.path.join(new_image_path, date_folder)
            if os.path.isdir(date_path):
              image_id = os.listdir(date_path)[0]
              image_folder = os.path.join(date_path, image_id)
              image_name = os.listdir(image_folder)[0]
              image_path = os.path.join(image_folder, image_name)

        # Load image
        try:
            image = nib.load(image_path).get_fdata()
        except Exception as e:
            raise RuntimeError(f"Error loading image: {image_path}. Details: {e}")
        # Normalize to 0,1
        image = (image - image.min()) / (image.max() - image.min())
        # Scale Pixel Values
        image = (image * 255).astype(np.uint8)
        # Convert to gray scale
        if image.ndim == 3:
            image = image[:, :, image.shape[2] // 2]

        # Convert NumPy array to PIL image and transform
        image = Image.fromarray(image)
        if self.transform:
          image = self.transform(image)

        # Get Label
        if (row['DIAGNOSIS_GROUP'] == 'MCI_to_AD')|(row['DIAGNOSIS_GROUP'] == 'Normal_to_AD')|(row['DIAGNOSIS_GROUP'] == 'Only_AD'):
          label = 1
        else:
          label = 0

        return image, label

# Define image transformations
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((224, 224)),  # Resize to a standard size (e.g., for a CNN)
    transforms.ToTensor(),          # Convert image to PyTorch tensor
    transforms.Normalize([0.5], [0.5])  # Normalize with mean and std
])

# Create dataset and dataloader
dataset = MRIDataset(dataframe=full_train, root_dir=root_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

# Example of iterating over the dataloader
for images, labels in dataloader:
    print(images.shape, labels)


torch.Size([64, 1, 224, 224]) tensor([1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
        0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,
        1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1])
torch.Size([64, 1, 224, 224]) tensor([0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
        0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1,
        1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0])
torch.Size([64, 1, 224, 224]) tensor([0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,
        1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0,
        0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([64, 1, 224, 224]) tensor([1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1,
        0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0,
        0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0])
torch.Size([

In [None]:
test_set = MRIDataset(full_test, root_dir=root_dir, transform=transform)
test_loader = DataLoader(test_set, batch_size=64, shuffle=True)
for images, labels in test_loader:
    print(images.shape, labels)

torch.Size([64, 1, 224, 224]) tensor([1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0,
        1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0,
        1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1])
torch.Size([64, 1, 224, 224]) tensor([1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
        0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0,
        0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1])
torch.Size([64, 1, 224, 224]) tensor([1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1,
        1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1,
        0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0])
torch.Size([62, 1, 224, 224]) tensor([0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
        1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0,
        1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0])


# Image Differencing

In [None]:
# 1. Sort the DataFrame by PTID and Visit
# Assuming your dataframe contains columns ['PTID', 'Visit', 'Image Path', 'Label']
def preprocess_and_sort(df):
    # Sort the dataframe by PTID and Visit (assuming visits are in 'm06', 'm12', etc.)
    df = df.sort_values(by=['Subject', 'Visit'])
    return df

# 2. Image differencing function
def image_differencing(image1_path, image2_path):
    # Open the two images
    img1 = nib.load(image1_path).get_fdata()
    img2 = nib.load(image2_path).get_fdata()
    # Get middle slcie
    img1_mid = img1[img1.shape[0] // 2, :, :]
    img2_mid = img2[img2.shape[0] // 2, :, :]
    # Normalize to 0,1
    img1_mid = (img1_mid - img1_mid.min()) / (img1_mid.max() - img1_mid.min())
    img2_mid = (img2_mid - img2_mid.min()) / (img2_mid.max() - img2_mid.min())
    # Scale Pixel Values
    image1 = (img1_mid * 255).astype(np.uint8)
    image2 = (img2_mid * 255).astype(np.uint8)

    # Resize to ensure matching shapes (e.g., 256x256)
    target_size = (224, 224)  # Example size
    img1_resized = Image.fromarray(image1).resize(target_size, Image.BILINEAR)
    img2_resized = Image.fromarray(image2).resize(target_size, Image.BILINEAR)

    # Perform image differencing (pixel-wise subtraction)
    img_diff = np.abs(np.array(img1_resized) - np.array(img2_resized))

    # Convert the difference array back to an image (in case you need to save or visualize it)
    img_diff = Image.fromarray(img_diff)

    return img_diff

# 3. Dataset class for differenced images
class ImageDifferenceDataset(Dataset):
    def __init__(self, df, transform=None):
        """
        Args:
            df (DataFrame): DataFrame containing columns ['Subject', 'Visit', 'FullPath', 'DIAGNOSIS_GROUP']
            transform (callable, optional): Optional transform to be applied on an image.
        """
        self.df = df
        self.transform = transform
        self.data = self.create_image_diff_data()

    def create_image_diff_data(self):
        """
        Create the image differences for each PTID.
        """
        data = []
        ptids = self.df['Subject'].unique()
        i=0
        for ptid in ptids:
            ptid_data = self.df[self.df['Subject'] == ptid]
            ptid_data = ptid_data.drop_duplicates(subset=['Visit'])
            if len(ptid_data) >= 3:
              # For m06-bl difference (Baseline vs. 6 months)
              m06_bl_img1 = ptid_data[ptid_data['Visit'] == 'sc']['FullPath'].values[0]
              m06_bl_img2 = ptid_data[ptid_data['Visit'] == 'm06']['FullPath'].values[0]
              m06_bl_diff = image_differencing(m06_bl_img1, m06_bl_img2)

              # For m12-m06 difference (12 months vs. 6 months)
              m12_m06_img1 = ptid_data[ptid_data['Visit'] == 'm06']['FullPath'].values[0]
              m12_m06_img2 = ptid_data[ptid_data['Visit'] == 'm12']['FullPath'].values[0]
              m12_m06_diff = image_differencing(m12_m06_img1, m12_m06_img2)
              diagnosis_group = ptid_data['DIAGNOSIS_GROUP'].values[0]
              if diagnosis_group in ['MCI_to_AD', 'Normal_to_AD', 'Only_AD']:
                  label = 1
              else:
                  label = 0

              # Append the differenced images and corresponding labels to the data list
              data.append((m06_bl_diff, label))
              data.append((m12_m06_diff, label))
            else:
              print(f'Not found for {ptid}')

        return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, label = self.data[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

# 4. DataLoader with transformations
diff_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [None]:
## Prep Test Image Diffing
test_sorted = preprocess_and_sort(full_test)

diff_testset = ImageDifferenceDataset(test_sorted, transform=diff_transform)

diff_testloader = DataLoader(diff_testset, batch_size=64, shuffle=True)

Not found for 002_S_1070
Not found for 005_S_0324
Not found for 014_S_0520
Not found for 021_S_0276
Not found for 027_S_0120
Not found for 027_S_0179
Not found for 027_S_0307
Not found for 027_S_0408
Not found for 035_S_0156
Not found for 041_S_0898
Not found for 057_S_0474
Not found for 057_S_0839
Not found for 099_S_0551
Not found for 114_S_0601
Not found for 126_S_0784
Not found for 126_S_0865
Not found for 126_S_0891
Not found for 127_S_0844
Not found for 137_S_0443
Not found for 137_S_0722
Not found for 137_S_0973
Not found for 137_S_1041


In [None]:
for images, labels in diff_testloader:
    print(images.shape, labels)

In [None]:
## Prep training Image Diffing
train_sorted = preprocess_and_sort(full_train)

# Step 2: Create Dataset
diff_trainset = ImageDifferenceDataset(train_sorted, transform=diff_transform)

# Step 3: Create DataLoader
diff_trainloader = DataLoader(diff_trainset, batch_size=64, shuffle=True)

Not found for 005_S_0223
Not found for 007_S_0293
Not found for 011_S_0005
Not found for 011_S_0010
Not found for 011_S_0023
Not found for 011_S_0183
Not found for 011_S_0241
Not found for 011_S_0861
Not found for 011_S_1080
Not found for 014_S_0328
Not found for 014_S_0658
Not found for 016_S_0538
Not found for 016_S_0702
Not found for 021_S_0141
Not found for 021_S_0424
Not found for 021_S_0753
Not found for 027_S_0256
Not found for 033_S_0516
Not found for 033_S_0567
Not found for 033_S_0906
Not found for 033_S_0920
Not found for 033_S_0922
Not found for 035_S_0033
Not found for 035_S_0292
Not found for 036_S_0577
Not found for 036_S_0656
Not found for 036_S_0760
Not found for 036_S_0813
Not found for 041_S_0314
Not found for 041_S_0679
Not found for 057_S_0464
Not found for 057_S_0934
Not found for 099_S_0040
Not found for 099_S_0291
Not found for 099_S_0372
Not found for 099_S_0534
Not found for 126_S_0680
Not found for 127_S_0259
Not found for 127_S_0394
Not found for 127_S_0431


In [None]:
for images, labels in diff_trainloader:
    print(images.shape, labels)

# Model Training

## Create and Load Models

In [None]:
## Import Pretrained Models
from torchvision import models
def modify_resnet_for_grayscale(model):
    model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    return model
def modify_vgg_for_grayscale(model):
    model.features[0] = nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    return model
models_dict = {
    'resnet18': models.resnet18(pretrained=True),
    'vgg11': models.vgg11(pretrained=True),
    'densenet121': models.densenet121(pretrained=True)
}

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 176MB/s]
Downloading: "https://download.pytorch.org/models/vgg11-8a719046.pth" to /root/.cache/torch/hub/checkpoints/vgg11-8a719046.pth
100%|██████████| 507M/507M [00:02<00:00, 220MB/s]
Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 154MB/s]


In [None]:
## Simple CNN
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)  # Adjust based on image size
        self.fc2 = nn.Linear(128, 2)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

## Train Models

In [None]:

def train_model(model, og_model_name, train_loader, val_loader, num_epochs, criterion, optimizer, device, metrics_df, model_save_dir, metrics_save_dir):
    """
    Train and validate a model, save the best models and log the metrics into a dataframe.

    Args:
        model: PyTorch model (pretrained or custom).
        train_loader: DataLoader for training data.
        val_loader: DataLoader for validation data.
        num_epochs: Number of training epochs.
        criterion: Loss function (e.g., CrossEntropyLoss).
        optimizer: Optimizer (e.g., Adam, SGD).
        device: Device to run training on ('cuda' or 'cpu').
        model_save_dir: Directory to save the best models.
        metrics_df: DataFrame to store the metrics of the models.
        metrics_save_dir: Directory to save the metrics dataframe.
    """
    # Track the top 5 models by validation F1 score
    top_models = []
    os.makedirs(model_save_dir, exist_ok=True)

    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        running_loss = 0.0
        all_preds, all_labels = [], []

        # Training loop
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Back pass and optimization
            loss.backward()
            optimizer.step()

            # Accumulate metrics
            running_loss += loss.item()
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

        # Calculate training metrics
        train_acc = accuracy_score(all_labels, all_preds)
        train_f1 = f1_score(all_labels, all_preds, average='weighted')

        print(f"Epoch {epoch+1}/{num_epochs} - Training Loss: {running_loss:.4f} - Accuracy: {train_acc:.4f} - F1 Score: {train_f1:.4f}")

        # Validation loop
        model.eval()
        val_preds, val_labels = [], []
        with torch.no_grad():
            for val_inputs, val_labels_batch in val_loader:
                val_inputs, val_labels_batch = val_inputs.to(device), val_labels_batch.to(device)
                val_outputs = model(val_inputs)
                val_preds_batch = torch.argmax(val_outputs, dim=1)
                val_preds.extend(val_preds_batch.cpu().numpy())
                val_labels.extend(val_labels_batch.cpu().numpy())

        # Calculate validation metrics
        val_acc = accuracy_score(val_labels, val_preds)
        val_f1 = f1_score(val_labels, val_preds, average='weighted')

        print(f"Validation Accuracy: {val_acc:.4f} - F1 Score: {val_f1:.4f}")

        # Save model if it is in top 5 by validation F1 score
        top_models.append((val_f1, f"{og_model_name}_{epoch+1}", train_acc, train_f1, val_acc, val_f1))
        top_models = sorted(top_models, key=lambda x: x[0], reverse=True)[:5]  # Keep top 5 models

        for idx, (f1, model_name, _, _, _, _) in enumerate(top_models):
            print(f"Top {idx+1}: F1 Score = {f1:.4f}, Model = {model_name}")

        # Save the current model
        model_save_path = os.path.join(model_save_dir, f"{model_name}.pkl")
        torch.save(model.state_dict(), model_save_path)

        # Record metrics of the top models
        for f1, model_name, train_acc, train_f1, val_acc, val_f1 in top_models:
            metrics_df = pd.concat([metrics_df, pd.DataFrame({
                'Model': [model_name],
                'Epoch': [epoch+1],
                'Train Accuracy': [train_acc],
                'Train F1': [train_f1],
                'Test Accuracy': [val_acc],
                'Test F1': [val_f1]
            })], ignore_index=True)
    print("Top 5 models saved:")
    for f1, model_name, _, _, _, _ in top_models:
        print(f"F1 Score: {f1:.4f} - Model: {model_name}")

    # Save metrics dataframe
    metrics_df.to_csv(os.path.join(metrics_save_dir, 'model_metrics.csv'), index=False)
    return metrics_df
metrics_df = pd.DataFrame(columns=['Model', 'Epoch', 'Train Accuracy', 'Train F1', 'Test Accuracy', 'Test F1'])

In [None]:
torch.cuda.empty_cache()

In [None]:
for model_name, model in models_dict.items():
  torch.cuda.empty_cache()
  print(f"Training {model_name}...")
  if model_name == 'resnet18':
    model = modify_resnet_for_grayscale(model)
    model.fc = nn.Linear(model.fc.in_features, 2)
  elif model_name == 'vgg11':
    model = modify_vgg_for_grayscale(model)
    model.classifier[6] = nn.Linear(model.classifier[6].in_features, 2)
  elif model_name == 'densenet121':
    model = modify_vgg_for_grayscale(model)
    model.classifier = nn.Linear(model.classifier.in_features, 2)
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  print(device)
  model = model.to(device)
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=0.00001)
  num_epochs = 30
  metrics_df = train_model(model,
                          model_name,
                          diff_trainloader,
                          diff_testloader,
                          num_epochs,
                          criterion,
                          optimizer,
                          device,
                          metrics_df,
                          "/content/drive/MyDrive/PBHLT7120_Project/ProjectMaterials/Models/ClassifiersOne/",
                          "/content/drive/MyDrive/PBHLT7120_Project/ProjectMaterials/Models/")



Training resnet18...
cuda
Epoch 1/30 - Training Loss: 2.6898 - Accuracy: 0.3684 - F1 Score: 0.2926
Validation Accuracy: 0.5455 - F1 Score: 0.4006
Top 1: F1 Score = 0.4006, Model = resnet18_1


  metrics_df = pd.concat([metrics_df, pd.DataFrame({


Epoch 2/30 - Training Loss: 2.3150 - Accuracy: 0.4605 - F1 Score: 0.3623
Validation Accuracy: 0.5364 - F1 Score: 0.4722
Top 1: F1 Score = 0.4722, Model = resnet18_2
Top 2: F1 Score = 0.4006, Model = resnet18_1
Epoch 3/30 - Training Loss: 2.0296 - Accuracy: 0.5461 - F1 Score: 0.4727
Validation Accuracy: 0.5000 - F1 Score: 0.4927
Top 1: F1 Score = 0.4927, Model = resnet18_3
Top 2: F1 Score = 0.4722, Model = resnet18_2
Top 3: F1 Score = 0.4006, Model = resnet18_1
Epoch 4/30 - Training Loss: 1.7241 - Accuracy: 0.6645 - F1 Score: 0.6294
Validation Accuracy: 0.4727 - F1 Score: 0.4578
Top 1: F1 Score = 0.4927, Model = resnet18_3
Top 2: F1 Score = 0.4722, Model = resnet18_2
Top 3: F1 Score = 0.4578, Model = resnet18_4
Top 4: F1 Score = 0.4006, Model = resnet18_1
Epoch 5/30 - Training Loss: 1.7024 - Accuracy: 0.7237 - F1 Score: 0.7054
Validation Accuracy: 0.4636 - F1 Score: 0.4024
Top 1: F1 Score = 0.4927, Model = resnet18_3
Top 2: F1 Score = 0.4722, Model = resnet18_2
Top 3: F1 Score = 0.4578,

In [None]:
# Train Models for Image Differencing
for model_name, model in models_dict.items():
  print(f"Training {model_name}...")
  if model_name == 'resnet18':
    model = modify_resnet_for_grayscale(model)
    model.fc = nn.Linear(model.fc.in_features, 2)
  elif model_name == 'vgg11':
    model = modify_vgg_for_grayscale(model)
    model.classifier[6] = nn.Linear(model.classifier[6].in_features, 2)
  elif model_name == 'densenet121':
    model = modify_vgg_for_grayscale(model)
    model.classifier = nn.Linear(model.classifier.in_features, 2)
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  print(device)
  model = model.to(device)
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=0.00001)
  num_epochs = 30
  metrics_df = train_model(model,
                           model_name,
                          diff_trainloader,
                          diff_testloader,
                          num_epochs,
                          criterion,
                          optimizer,
                          device,
                          metrics_df,
                          "/content/drive/MyDrive/PBHLT7120_Project/ProjectMaterials/Models/ImageDiffing/",
                          "/content/drive/MyDrive/PBHLT7120_Project/ProjectMaterials/Models/ImageDiffing/")

Training resnet18...
cuda
Epoch 1/30 - Training Loss: 2.5502 - Accuracy: 0.4934 - F1 Score: 0.3391
Validation Accuracy: 0.5455 - F1 Score: 0.3850
Top 1: F1 Score = 0.3850, Model = resnet18_1
Epoch 2/30 - Training Loss: 2.1349 - Accuracy: 0.5197 - F1 Score: 0.3626
Validation Accuracy: 0.5455 - F1 Score: 0.3850
Top 1: F1 Score = 0.3850, Model = resnet18_1
Top 2: F1 Score = 0.3850, Model = resnet18_2
Epoch 3/30 - Training Loss: 1.8892 - Accuracy: 0.5329 - F1 Score: 0.3906
Validation Accuracy: 0.5455 - F1 Score: 0.4006
Top 1: F1 Score = 0.4006, Model = resnet18_3
Top 2: F1 Score = 0.3850, Model = resnet18_1
Top 3: F1 Score = 0.3850, Model = resnet18_2
Epoch 4/30 - Training Loss: 1.6953 - Accuracy: 0.5592 - F1 Score: 0.4431
Validation Accuracy: 0.5273 - F1 Score: 0.3914
Top 1: F1 Score = 0.4006, Model = resnet18_3
Top 2: F1 Score = 0.3914, Model = resnet18_4
Top 3: F1 Score = 0.3850, Model = resnet18_1
Top 4: F1 Score = 0.3850, Model = resnet18_2
Epoch 5/30 - Training Loss: 1.4970 - Accurac

# Logistic Regression

In [None]:
## Define Dataset Class
class LRMRIDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        """
        Args:
            dataframe (pd.DataFrame): DataFrame containing 'PatientID', 'Description', 'ImageID', and 'Group'.
            root_dir (str): Root directory of the dataset.
            transform (callable, optional): Optional transform to be applied on an image.
        """
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        if isinstance(idx, int):
            row = self.dataframe.iloc[idx]
        else:
            row = self.dataframe.loc[idx]
        # Construct image path
        new_image_path = os.path.join(
            self.root_dir,
            f"{row['FolderName']}",
            "ADNI",
            row['Subject'],
            row['Description'].replace(' ', '_').replace(';', '_')
        )
        if not os.path.exists(new_image_path):
          raise FileNotFoundError(f"Directory not found: {new_image_path}")
        if os.path.isdir(new_image_path):
            date_folder = os.listdir(new_image_path)[0]
            date_path = os.path.join(new_image_path, date_folder)
            if os.path.isdir(date_path):
              image_id = os.listdir(date_path)[0]
              image_folder = os.path.join(date_path, image_id)
              image_name = os.listdir(image_folder)[0]
              image_path = os.path.join(image_folder, image_name)

        # Load image
        try:
            image = nib.load(image_path).get_fdata()
        except Exception as e:
            raise RuntimeError(f"Error loading image: {image_path}. Details: {e}")
        # Normalize to 0,1
        image = (image - image.min()) / (image.max() - image.min())
        # Scale Pixel Values
        image = (image * 255).astype(np.uint8)
        # Convert to gray scale
        if image.ndim == 3:
            image = image[:, :, image.shape[2] // 2]

        # Convert NumPy array to PIL image and transform
        image = Image.fromarray(image)
        if self.transform:
          image = self.transform(image)

        # Get Label
        if (row['DIAGNOSIS_GROUP'] == 'MCI_to_AD')|(row['DIAGNOSIS_GROUP'] == 'Normal_to_AD')|(row['DIAGNOSIS_GROUP'] == 'Only_AD'):
          label = 1
        else:
          label = 0
        subject_id = row['Subject']

        return image, label, subject_id

# Define image transformations
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((224, 224)),  # Resize to a standard size (e.g., for a CNN)
    transforms.ToTensor(),          # Convert image to PyTorch tensor
    transforms.Normalize([0.5], [0.5])  # Normalize with mean and std
])


In [None]:
# 1. Sort the DataFrame by PTID and Visit
# Assuming your dataframe contains columns ['PTID', 'Visit', 'Image Path', 'Label']
def preprocess_and_sort(df):
    # Sort the dataframe by PTID and Visit (assuming visits are in 'm06', 'm12', etc.)
    df = df.sort_values(by=['Subject', 'Visit'])
    return df

# 2. Image differencing function
def image_differencing(image1_path, image2_path):
    # Open the two images
    img1 = nib.load(image1_path).get_fdata()
    img2 = nib.load(image2_path).get_fdata()
    # Get middle slcie
    img1_mid = img1[img1.shape[0] // 2, :, :]
    img2_mid = img2[img2.shape[0] // 2, :, :]
    # Normalize to 0,1
    img1_mid = (img1_mid - img1_mid.min()) / (img1_mid.max() - img1_mid.min())
    img2_mid = (img2_mid - img2_mid.min()) / (img2_mid.max() - img2_mid.min())
    # Scale Pixel Values
    image1 = (img1_mid * 255).astype(np.uint8)
    image2 = (img2_mid * 255).astype(np.uint8)

    # Resize to ensure matching shapes (e.g., 256x256)
    target_size = (224, 224)  # Example size
    img1_resized = Image.fromarray(image1).resize(target_size, Image.BILINEAR)
    img2_resized = Image.fromarray(image2).resize(target_size, Image.BILINEAR)

    # Perform image differencing (pixel-wise subtraction)
    img_diff = np.abs(np.array(img1_resized) - np.array(img2_resized))

    # Convert the difference array back to an image (in case you need to save or visualize it)
    img_diff = Image.fromarray(img_diff)

    return img_diff

# 3. Dataset class for differenced images
class LRImageDifferenceDataset(Dataset):
    def __init__(self, df, transform=None):
        """
        Args:
            df (DataFrame): DataFrame containing columns ['Subject', 'Visit', 'FullPath', 'DIAGNOSIS_GROUP']
            transform (callable, optional): Optional transform to be applied on an image.
        """
        self.df = df
        self.transform = transform
        self.data = self.create_image_diff_data()

    def create_image_diff_data(self):
        """
        Create the image differences for each PTID.
        """
        data = []
        ptids = self.df['Subject'].unique()
        for ptid in ptids:
            ptid_data = self.df[self.df['Subject'] == ptid]
            ptid_data = ptid_data.drop_duplicates(subset=['Visit'])
            if len(ptid_data) >= 2:
              # For m06-bl difference (Baseline vs. 6 months)
              m06_bl_img1 = ptid_data[ptid_data['Visit'] == 'sc']['FullPath'].values[0]
              m06_bl_img2 = ptid_data[ptid_data['Visit'] == 'm06']['FullPath'].values[0]
              m06_bl_diff = image_differencing(m06_bl_img1, m06_bl_img2)

              diagnosis_group = ptid_data['DIAGNOSIS_GROUP'].values[0]
              if diagnosis_group in ['MCI_to_AD', 'Normal_to_AD', 'Only_AD']:
                  label = 1
              else:
                  label = 0
              subject_id = ptid_data['Subject'].values[0]
              # Append the differenced images and corresponding labels to the data list
              data.append((m06_bl_diff, label, subject_id))
            else:
              print(f'Not found for {ptid}')

        return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, label, subject_id = self.data[idx]

        if self.transform:
            image = self.transform(image)

        return image, label, subject_id

# 4. DataLoader with transformations
diff_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [None]:
# Create different train/test sets for easier inference
lr_6mo = lr_traininfo.loc[lr_traininfo['Visit'].isin(['m06'])]
lr_sc = lr_traininfo.loc[lr_traininfo['Visit'].isin(['sc'])]
lr_6mo_test = lr_testinfo.loc[lr_testinfo['Visit'].isin(['m06'])]
lr_sc_test = lr_testinfo.loc[lr_testinfo['Visit'].isin(['sc'])]

# Train Set Loaders
lr_6motrainset = LRMRIDataset(lr_6mo, root_dir=root_dir, transform=transform)
lr_6motrainloader = DataLoader(lr_6motrainset, batch_size=64, shuffle=False)

lr_sctrainset = LRMRIDataset(lr_sc, root_dir=root_dir, transform=transform)
lr_sctrainloader = DataLoader(lr_sctrainset, batch_size=64, shuffle=False)

lr_diftrain = LRImageDifferenceDataset(lr_traininfo, transform=diff_transform)
lr_diftrainloader = DataLoader(lr_diftrain, batch_size=64, shuffle=False)

# Test Set Loaders
lr_6motestset = LRMRIDataset(lr_6mo_test, root_dir=root_dir, transform=transform)
lr_6motestloader = DataLoader(lr_6motestset, batch_size=64, shuffle=False)

lr_sctestset = LRMRIDataset(lr_sc_test, root_dir=root_dir, transform=transform)
lr_sctestloader = DataLoader(lr_sctestset, batch_size=64, shuffle=False)

lr_dif_test = LRImageDifferenceDataset(lr_testinfo, transform=diff_transform)
lr_dif_testloader = DataLoader(lr_dif_test, batch_size=64, shuffle=False)


Not found for 033_S_0923
Not found for 027_S_0118
Not found for 137_S_0800
Not found for 033_S_1016
Not found for 005_S_0610
Not found for 036_S_0945
Not found for 035_S_0048
Not found for 114_S_0166
Not found for 099_S_0470
Not found for 057_S_0643
Not found for 021_S_0984
Not found for 014_S_0563
Not found for 014_S_0557
Not found for 137_S_0668
Not found for 005_S_0553
Not found for 036_S_0673
Not found for 027_S_0403
Not found for 005_S_0221
Not found for 002_S_1018
Not found for 094_S_0711
Not found for 021_S_0343
Not found for 114_S_0979
Not found for 036_S_0759
Not found for 114_S_0416
Not found for 036_S_0672
Not found for 137_S_0631
Not found for 014_S_0558
Not found for 099_S_0533
Not found for 127_S_0260
Not found for 027_S_0835
Not found for 099_S_0090
Not found for 099_S_0352
Not found for 007_S_0316
Not found for 005_S_0546
Not found for 027_S_0074
Not found for 002_S_1155
Not found for 033_S_1098
Not found for 035_S_0555
Not found for 027_S_0644
Not found for 137_S_0481


In [None]:
# Function to generate predictions
def generate_predictions(model, dataloader, device='cuda'):
    predictions_list = []
    labels_list = []
    visits_list = []
    subjects_list = []

    with torch.no_grad():
        for inputs, labels, subject_id in dataloader:
            outputs = model(inputs)
            probs = softmax(outputs, dim=1)[:, 1].cpu().numpy()  # Probability of class 1
            predictions_list.extend(probs)
            labels_list.extend(labels.numpy())
            subjects_list.extend(subject_id)

    return pd.DataFrame({
        'Subject': subjects_list,
        'Label': labels_list,
        'Probability': predictions_list
    })

In [None]:
class_models_df = pd.read_csv('/content/drive/MyDrive/PBHLT7120_Project/ProjectMaterials/Models/model_metrics.csv')
class_models_df.sort_values(by='Test F1', ascending=False, inplace=True)
class_models_df.reset_index(drop=True, inplace=True)
dif_models_df = pd.read_csv('/content/drive/MyDrive/PBHLT7120_Project/ProjectMaterials/Models/ImageDiffing/model_metrics.csv')
dif_models_df.sort_values(by='Test F1', ascending=False, inplace=True)
dif_models_df.reset_index(drop=True, inplace=True)

# Add F1 Rank as a column (1-based ranking for readability)
class_models_df['F1 Rank'] = class_models_df.index + 1
dif_models_df['F1 Rank'] = dif_models_df.index + 1



all_models = pd.merge(class_models_df, dif_models_df, on='F1 Rank', suffixes=('_class', '_dif'))
all_models.drop_duplicates(subset='Test F1_class', inplace=True)
all_models.drop_duplicates(subset='Test F1_dif', inplace=True)

all_models.reset_index(drop=True, inplace=True)
all_models.head(15)



Unnamed: 0,Model_class,Epoch_class,Train Accuracy_class,Train F1_class,Test Accuracy_class,Test F1_class,F1 Rank,Model_dif,Epoch_dif,Train Accuracy_dif,Train F1_dif,Test Accuracy_dif,Test F1_dif
0,vgg11_23,25,0.953947,0.953857,0.536364,0.537324,1,resnet18_29,29,1.0,1.0,0.6,0.578571
1,densenet121_16,21,1.0,1.0,0.554545,0.537234,9,resnet18_28,29,1.0,1.0,0.590909,0.570889
2,resnet18_29,30,1.0,1.0,0.536364,0.537172,40,resnet18_10,24,0.993421,0.99342,0.618182,0.568534
3,densenet121_22,23,1.0,1.0,0.527273,0.528212,72,resnet18_12,27,0.993421,0.99342,0.6,0.554286
4,densenet121_18,25,1.0,1.0,0.536364,0.527423,98,resnet18_7,15,0.868421,0.865618,0.6,0.547988
5,resnet18_26,28,1.0,1.0,0.527273,0.526335,125,resnet18_10,12,1.0,1.0,0.545455,0.546359
6,resnet18_22,28,1.0,1.0,0.527273,0.525082,137,resnet18_6,13,1.0,1.0,0.545455,0.545455
7,densenet121_17,19,1.0,1.0,0.536364,0.521722,145,vgg11_23,27,0.953947,0.953857,0.536364,0.537324
8,densenet121_13,16,1.0,1.0,0.536364,0.510476,152,densenet121_16,27,1.0,1.0,0.554545,0.537234
9,vgg11_13,20,0.868421,0.868353,0.5,0.500372,189,vgg11_21,29,0.940789,0.940797,0.536364,0.537169


In [None]:
# Generate predictions for all datasets
class_model_root = '/content/drive/MyDrive/PBHLT7120_Project/ProjectMaterials/Models/ClassifiersOne/'
dif_model_root = '/content/drive/MyDrive/PBHLT7120_Project/ProjectMaterials/Models/ImageDiffing/'
from torchvision.models import resnet18, vgg11, densenet121
from torch.nn import functional as F
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
class CustomResNet(nn.Module):
    def __init__(self, num_classes=2):
        super(CustomResNet, self).__init__()
        self.model = resnet18(pretrained=False)
        modify_resnet_for_grayscale(self.model)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)  # Custom classification head

    def forward(self, x):
        return self.model(x)

class CustomVGG(nn.Module):
    def __init__(self, num_classes=2):
        super(CustomVGG, self).__init__()
        self.model = vgg11(pretrained=False)
        modify_vgg_for_grayscale(self.model)
        self.model.classifier[6] = nn.Linear(self.model.classifier[6].in_features, num_classes)  # Custom classification head

    def forward(self, x):
        return self.model(x)
class CustomDenseNet(nn.Module):
    def __init__(self, num_classes=2):
        super(CustomDenseNet, self).__init__()
        self.model = densenet121(pretrained=False)
        modify_vgg_for_grayscale(self.model)
        self.model.classifier = nn.Linear(self.model.classifier.in_features, num_classes)  # Custom classification head

    def forward(self, x):
        return self.model(x)

for row_id, _ in all_models.iterrows():
  torch.cuda.empty_cache()
  # Load Models
  try:
    row = all_models.iloc[row_id]
    class_name = row['Model_class']
    dif_name = row['Model_dif']
    if class_name == 'resnet18%':
      class_model = CustomResNet(num_classes=2)
    elif class_name == 'vgg11%':
      class_model = CustomVGG(num_classes=2)
    elif class_name == 'densenet121%':
      class_model = CustomDenseNet(num_classes=2)

    if dif_name == 'resnet18%':
      dif_model = CustomResNet(num_classes=2)
    elif dif_name == 'vgg11%':
      dif_model = CustomVGG(num_classes=2)
    elif dif_name == 'densenet121%':
      dif_model = CustomDenseNet(num_classes=2)

    class_path = os.path.join(class_model_root, f'{class_name}.pkl')
    dif_path = os.path.join(dif_model_root, f'{dif_name}.pkl')
    state_dict = torch.load(class_path, map_location=torch.device('cuda'))
    state_dict = {f"model.{k}": v for k, v in state_dict.items()}
    class_model.load_state_dict(state_dict)
    state_dict = torch.load(dif_path, map_location=torch.device('cuda'))
    state_dict = {f"model.{k}": v for k, v in state_dict.items()}
    dif_model.load_state_dict(state_dict)

    # Create Training Image Predictions
    df_6mo_class = generate_predictions(class_model, lr_6motrainloader)
    df_sc_class = generate_predictions(class_model, lr_sctrainloader)
    df_diff = generate_predictions(dif_model, lr_diftrainloader)

    # Create image predictions for test data
    df_6mo_class_test = generate_predictions(class_model, lr_6motestloader)
    df_sc_class_test = generate_predictions(class_model, lr_sctestloader)
    df_diff_test = generate_predictions(dif_model, lr_dif_testloader)

    # Combine Training Predictions Into a Single DataFrame
    df_6mo_class.rename(columns={'Probability': 'Probability_6mo'}, inplace=True)
    df_sc_class.rename(columns={'Probability': 'Probability_sc'}, inplace=True)
    df_diff.rename(columns={'Probability': 'Probability_diff'}, inplace=True)
    df_diff = df_diff.drop(columns=['Label'])
    df_sc_class = df_sc_class.drop(columns=['Label'])
    df_combined = pd.merge(df_6mo_class, df_sc_class, on='Subject', how='inner')
    df_combined = pd.merge(df_combined, df_diff, on='Subject', how='inner')
    df_combined = df_combined.reset_index(drop=True)

    # Combine Testing Predictions Into a Single DataFrame
    df_6mo_class_test.rename(columns={'Probability': 'Probability_6mo'}, inplace=True)
    df_sc_class_test.rename(columns={'Probability': 'Probability_sc'}, inplace=True)
    df_diff_test.rename(columns={'Probability': 'Probability_diff'}, inplace=True)
    df_6mo_class_test = df_6mo_class_test.reset_index(drop=True)
    df_sc_class_test = df_sc_class_test.reset_index(drop=True)
    df_diff_test = df_diff_test.reset_index(drop=True)
    df_6mo_class_test.drop(columns=['Label'], inplace=True)
    df_diff_test.drop(columns=['Label'], inplace=True)
    combined_test = pd.merge(df_6mo_class_test, df_sc_class_test, on='Subject', how='inner')
    combined_test = pd.merge(combined_test, df_diff_test, on='Subject', how='inner')

    # Fit logistic regression and calculate training statistics
    lr = LogisticRegression()
    lr.fit(df_combined[['Probability_6mo', 'Probability_sc', 'Probability_diff']], df_combined['Label'])
    predictions = lr.predict(df_combined[['Probability_6mo', 'Probability_sc', 'Probability_diff']])
    accuracy = accuracy_score(df_combined['Label'], predictions)
    print(f"Train Accuracy: {accuracy}")
    f1 = f1_score(df_combined['Label'], predictions)
    print(f"Train F1 Score: {f1}")

    # Evaluate on Test Data
    predictions_test = lr.predict(combined_test[['Probability_6mo', 'Probability_sc', 'Probability_diff']])
    accuracy_test = accuracy_score(combined_test['Label'], predictions_test)
    f1_test = f1_score(combined_test['Label'], predictions_test)
    print(f"Test Accuracy: {accuracy_test}")
    print(f"Test F1 Score: {f1_test}")
  except:
    print('Error')

  state_dict = torch.load(class_path, map_location=torch.device('cuda'))


Error
Error
Error
Error
Error


  state_dict = torch.load(dif_path, map_location=torch.device('cuda'))


Train Accuracy: 0.5646551724137931
Train F1 Score: 0.34838709677419355
Test Accuracy: 0.6504854368932039
Test F1 Score: 0.5263157894736842


  state_dict = torch.load(class_path, map_location=torch.device('cuda'))
  state_dict = torch.load(dif_path, map_location=torch.device('cuda'))


Train Accuracy: 0.5646551724137931
Train F1 Score: 0.36477987421383645
Test Accuracy: 0.6504854368932039
Test F1 Score: 0.5263157894736842
Error


  state_dict = torch.load(class_path, map_location=torch.device('cuda'))


Error
Error
Error


  state_dict = torch.load(dif_path, map_location=torch.device('cuda'))


Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
