In [1]:
import os
import math
import torch
import random
import shutil
import pickle
import pprint
import warnings
import numpy as np
import pandas as pd
import torch.nn as nn
from PIL import Image
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import models, transforms
from transformers import ViTFeatureExtractor, ViTModel, ViTImageProcessor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import root_mean_squared_error
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_frame = pd.read_csv('oasis_cross-sectional.csv')
data_frame.drop('Delay', axis=1, inplace=True)
data_frame = data_frame.dropna()

with open('patients_codes_set.pkl', 'rb') as f:
    patients_codes_set = pickle.load(f)

print("Set loaded from patients_codes_set.pkl:", patients_codes_set)

Set loaded from patients_codes_set.pkl: {'0233', '0164', '0205', '0022', '0158', '0272', '0440', '0380', '0094', '0307', '0286', '0247', '0210', '0352', '0390', '0315', '0308', '0042', '0304', '0351', '0143', '0226', '0374', '0432', '0120', '0003', '0238', '0441', '0329', '0115', '0267', '0453', '0288', '0451', '0273', '0300', '0411', '0066', '0142', '0243', '0039', '0179', '0290', '0335', '0082', '0447', '0161', '0400', '0263', '0123', '0021', '0402', '0454', '0016', '0287', '0418', '0298', '0023'}


In [3]:
regression_dataframe = pd.DataFrame(columns=['Type', 'model', 'MMSE MAE', 'MMSE MSE', 'MMSE R2', 'MMSE RMSE'])

lr = 1e-6
epochs = 501
num = 400
train_folder = './train_data'
# test_folder = './test_data'
test_folder = './train_data'

if not os.path.exists('./comparison'+'/models'):
    os.makedirs('./comparison'+'/models')

if not os.path.exists('./comparison'+'/results'):
    os.makedirs('./comparison'+'/results')

if not os.path.exists('./comparison'+'/test'):
    os.makedirs('./comparison'+'/test')

if not os.path.exists('./models'):
    os.makedirs('./models')

In [4]:
folders = os.listdir(train_folder)
folders = [folder for folder in folders if os.path.isdir(train_folder + '/'+folder)]
print("Folders:", folders)

index = []
for i in range(len(folders)):
    folder = folders[i]
    if i == 0:
        y = 1
    elif i == 1:
        y = 3
    elif i == 2:
        y = 2
    else:
        y = 0
    for file in os.listdir(train_folder + '/' + folder):
        index.append([train_folder + '/' + folder + '/' + file, y])

Folders: ['VeryMildDemented', 'ModerateDemented', 'MildDemented', 'NonDemented']


In [5]:
def calculate_results(results, mode):
    # adding MR1_1 results to the dataframe
    mae_MMSE = mean_absolute_error(results['True MMSE'], results['Predicted MMSE'])
    # print(f'MMSE MAE: {mae_MMSE}')
    # mae_CDR = mean_absolute_error(results['True CDR'], results['Predicted CDR'])
    # # print(f'CDR MAE: {mae_CDR}')
    # mae_SES = mean_absolute_error(results['True SES'], results['Predicted SES'])
    # print(f'SES MAE: {mae_SES}')

    mse_MMSE = mean_squared_error(results['True MMSE'], results['Predicted MMSE'])
    # print(f'MMSE MSE: {mse_MMSE}')
    # mse_CDR = mean_squared_error(results['True CDR'], results['Predicted CDR'])
    # # print(f'CDR MSE: {mse_CDR}')
    # mse_SES = mean_squared_error(results['True SES'], results['Predicted SES'])
    # print(f'SES MSE: {mse_SES}')

    r2_MMSE = r2_score(results['True MMSE'], results['Predicted MMSE'])
    # print(f'MMSE R2: {r2_MMSE}')
    # r2_CDR = r2_score(results['True CDR'], results['Predicted CDR'])
    # # print(f'CDR R2: {r2_CDR}')
    # r2_SES = r2_score(results['True SES'], results['Predicted SES'])
    # print(f'SES R2: {r2_SES}')

    rmse_MMSE = root_mean_squared_error(results['True MMSE'], results['Predicted MMSE'])
    # print(f'MMSE RMSE: {rmse_MMSE}')
    # rmse_CDR = root_mean_squared_error(results['True CDR'], results['Predicted CDR'])
    # # print(f'CDR RMSE: {rmse_CDR}')
    # rmse_SES = root_mean_squared_error(results['True SES'], results['Predicted SES'])
    # print(f'SES RMSE: {rmse_SES}')

    # new_data = pd.DataFrame([{'Type': 'Regression', 'model': mode, 'MMSE MAE': mae_MMSE, 'CDR MAE': mae_CDR, 'SES MAE': mae_SES, 'MMSE MSE': mse_MMSE, 'CDR MSE': mse_CDR, 'SES MSE': mse_SES, 'MMSE R2': r2_MMSE, 'CDR R2': r2_CDR, 'SES R2': r2_SES, 'MMSE RMSE': rmse_MMSE, 'CDR RMSE': rmse_CDR, 'SES RMSE': rmse_SES}])
    
    new_data = pd.DataFrame([{'Type': 'Regression', 'model': mode, 'MMSE MAE': mae_MMSE, 'MMSE MSE': mse_MMSE, 'MMSE R2': r2_MMSE, 'MMSE RMSE': rmse_MMSE}])

    return new_data

In [6]:
# regression model
# Initialize the feature extractor and the model
feature_extractor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')
vit_model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')

# Freeze the ViT model parameters
for param in vit_model.parameters():
    param.requires_grad = False

class CombinedModel(nn.Module):
    def __init__(self, vit_model):
        super(CombinedModel, self).__init__()
        self.vit_model = vit_model
        self.linear_layer = nn.Linear(vit_model.config.hidden_size, 3)
        self.linear_layer.weight.data.normal_(mean=0.0, std=0.02)
        self.relu = nn.ReLU()
        self.out_1 = nn.Linear(8, 5)
        self.out_1.weight.data.normal_(mean=0.0, std=0.02)
        self.out_2 = nn.Linear(5, 1)
        self.out_2.weight.data.normal_(mean=0.0, std=0.02)

    def forward(self, inputs, x):
        # Preprocess the input tensor
        inputs = inputs.mean(dim=0, keepdim=True).repeat(3, 1, 1)
        inputs = feature_extractor(images=inputs, return_tensors="pt")
        outputs = self.vit_model(**inputs)
        last_hidden_state = outputs.last_hidden_state
        pooled_output = last_hidden_state.mean(dim=1)
        final_output = self.linear_layer(pooled_output)
        # print(final_output.shape, x.shape)
        final_output = torch.cat((final_output, x), 1)
        # print(final_output.shape)
        final_output = self.relu(final_output)
        final_output = self.out_1(final_output)
        final_output = self.relu(final_output)
        final_output = self.out_2(final_output)
        return final_output

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)
model = CombinedModel(vit_model).to(device)
# Define the loss function and optimizer
criterion = nn.MSELoss()

Device: cpu


In [7]:
mode_list = ['MR1_1', 'MR1_2', 'MR1_3', 'MR1_4']

In [8]:
# regression models

for i in range(len(mode_list)):
    model = CombinedModel(vit_model).to(device)
    model_load_path = 'models/MR1_1_reg.pth'
    # model.load_state_dict(torch.load(model_load_path))

    seed = 42
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)

    model = CombinedModel(vit_model).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    mode = mode_list[i]
    print(f"Training {mode} model")

    for epoch in range(epochs):
        running_loss = 0.0
        results = [['code', 'Predicted MMSE', 'True MMSE']]
        for i in range(num):
            random_index = random.randint(0, len(index)-1)
            filename = index[random_index][0]
            if mode in filename:
                data = np.load(index[random_index][0])
                data = data/255
                label = index[random_index][1]
                code = index[random_index][0][-14:-10]
                x = data_frame[data_frame['ID'].str.contains(code)][['Age', 'Educ', 'eTIV', 'nWBV', 'ASF']].to_numpy()
                x[:, 0] = (x[:, 0] - 18) / (96 - 18)
                x[:, 1] = (x[:, 1] - 5) / (5 - 1)
                x[:, 2] = (x[:, 2] - 1123) / (1992 - 1123)
                x[:, 3] = (x[:, 3] - 0.644) / (0.893 - 0.644)
                x[:, 4] = (x[:, 4] - 0.881) / (1.563 - 0.881)
                if code not in patients_codes_set:
                    x = np.zeros((1, 5))
                x = torch.tensor(x).float()
                y = data_frame[data_frame['ID'].str.contains(code)]['MMSE'].to_numpy()
                if len(y) == 0:
                    y = np.mean(data_frame['MMSE'])
                    y = np.array([y])
                # print(y)
                model.train()
                optimizer.zero_grad()
                input_tensor = torch.clamp(torch.tensor(data), 0, 1)
                outputs = model(input_tensor, x)
                loss = criterion(outputs, torch.tensor(y).float())
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
                results.append([code, outputs[0][0].detach().numpy(), y[0]])
        if epoch % 100 ==0:
            print(f'Epoch {epoch} loss: {running_loss}')

    # convert the results to a numpy array
    results = pd.DataFrame(results[1:], columns=results[0])
    results.to_csv('results/'+mode+'_'+'results_reg.csv', index=False)

    model_save_path = 'models/'+mode+'_'+'reg.pth'
    torch.save(model.state_dict(), model_save_path)
    print(f"Model saved to {model_save_path}")

    new_data = calculate_results(results, mode)
    regression_dataframe = pd.concat([regression_dataframe, new_data], ignore_index=True)

It looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.


Training MR1_1 model
Epoch 0 loss: 78701.53210449219
Epoch 100 loss: 74757.78858947754
Epoch 200 loss: 69160.09353637695
Epoch 300 loss: 70660.8083190918
Epoch 400 loss: 74302.4275970459
Epoch 500 loss: 76706.56958007812
Model saved to models/MR1_1_reg.pth
Training MR1_2 model
Epoch 0 loss: 90190.17953491211
Epoch 100 loss: 76625.70092773438
Epoch 200 loss: 72214.13122558594
Epoch 300 loss: 83002.19458007812
Epoch 400 loss: 73933.3599243164
Epoch 500 loss: 77370.93539428711
Model saved to models/MR1_2_reg.pth
Training MR1_3 model
Epoch 0 loss: 68644.83992004395
Epoch 100 loss: 74019.81463623047
Epoch 200 loss: 73348.26153564453
Epoch 300 loss: 74228.5511932373
Epoch 400 loss: 86622.53573608398
Epoch 500 loss: 73536.5950012207
Model saved to models/MR1_3_reg.pth
Training MR1_4 model
Epoch 0 loss: 68882.3466796875
Epoch 100 loss: 70684.30493164062
Epoch 200 loss: 86320.25775146484
Epoch 300 loss: 73352.9733581543
Epoch 400 loss: 63945.5680847168
Epoch 500 loss: 72364.41641235352
Model sa

In [9]:
regression_dataframe.to_csv('results/regression_results.csv', index=False)