In [21]:
import pandas as pd
import numpy as np
import glob
import os
import time
from tqdm import tqdm
from pathlib import Path
import math

from sklearn.model_selection import KFold
import matplotlib.pyplot as plt

import torch
import torch.optim as optim
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torchsummary import summary
from torch.optim.lr_scheduler import _LRScheduler
from torch import nn
from torch.utils.data import Dataset
from torchvision import transforms as T
import nibabel as nib
from skimage.transform import resize

import random

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(0)

In [38]:
def nll_loss(mean, log_var, targets):
    precision = torch.exp(-log_var)
    return torch.mean(precision * (targets - mean) ** 2 + log_var)

class CNN_Trainer():
    def __init__(self, model, results_folder, dataloader_train, dataloader_valid, dataloader_test, epochs, optimizer, scheduler):
        super(CNN_Trainer, self).__init__()

        self.models = [model]
        self.dataloader_train = dataloader_train
        self.dataloader_valid = dataloader_valid
        self.dataloader_test = dataloader_test
        self.epochs = epochs
        self.optimizers = [optimizer]
        self.schedulers = [scheduler]
        self.results_folder = Path(results_folder)
        self.results_folder.mkdir(exist_ok=True)
        self.valid_loss_min = np.Inf  # 검증 손실의 최소값 초기화

        # wandb.watch(model, log="all")  # WandB에 모델 로깅 시작

    def evaluate_uncertainty(self):
        """앙상블의 모든 모델에 대해 평균과 분산 계산 및 로깅"""
        ensemble_means = []
        ensemble_variances = []
        with torch.no_grad():
            for model in self.models:
                model.eval()
                means = []
                variances = []
                for inputs, _ in self.dataloader_valid:
                    inputs = inputs.cuda()
                    outputs = model(inputs)
                    mean = outputs[:, 0]
                    log_var = outputs[:, 1]
                    variance = torch.exp(log_var)
                    
                    means.append(mean)
                    variances.append(variance)
                
                ensemble_means.append(torch.cat(means, dim=0))
                ensemble_variances.append(torch.cat(variances, dim=0))

        # 앙상블 평균과 분산 계산
        final_mean = torch.mean(torch.stack(ensemble_means), dim=0)
        final_variance = torch.mean(torch.stack(ensemble_variances), dim=0) + torch.var(torch.stack(ensemble_means), dim=0)
        
        # WandB에 로깅
        # wandb.log({
        #     "Ensemble Mean Uncertainty": final_mean.mean().item(),
        #     "Ensemble Variance Uncertainty": final_variance.mean().item()
        # })


    def add_model(self, new_model, optimizer, scheduler):
        """새로운 모델을 앙상블에 추가하고 불확실성 평가"""
        self.models.append(new_model)
        self.optimizers.append(optimizer)
        self.schedulers.append(scheduler)
        # 앙상블에 모델 추가 후 불확실성 평가
        self.evaluate_uncertainty()


    def validate(self, model):
        model.eval()
        val_loss_sum = 0.0
        mae_sum = 0.0
        mean_sum = 0.0  # 평균의 합계
        variance_sum = 0.0  # 분산의 합계
        count = 0
        with torch.no_grad():
            for inputs, targets in self.dataloader_valid:
                inputs, targets = inputs.cuda(), targets.cuda()
                outputs = model(inputs)
                mean = outputs[:, 0]
                log_var = outputs[:, 1]
                variance = torch.exp(log_var)  # 로그 분산을 분산으로 변환

                loss = nll_loss(mean, log_var, targets)
                val_loss_sum += loss.item() * inputs.size(0)
                mae = torch.abs(mean - targets).mean()
                mae_sum += mae.item() * inputs.size(0)

                mean_sum += mean.sum().item()
                variance_sum += variance.sum().item()
                count += inputs.size(0)

        val_loss_avg = val_loss_sum / count
        mae_avg = mae_sum / count
        mean_avg = mean_sum / count  # 평균의 평균
        variance_avg = variance_sum / count  # 분산의 평균

        return val_loss_avg, mae_avg, mean_avg, variance_avg



    def train_single_model(self, model_idx):
        model = self.models[model_idx]
        optimizer = self.optimizers[model_idx]
        scheduler = self.schedulers[model_idx]

        for epoch in tqdm(range(self.epochs)):
            model.train()
            train_loss_sum = 0.0
            train_mean_sum = 0.0  # 훈련 데이터 평균의 합계
            train_variance_sum = 0.0  # 훈련 데이터 분산의 합계
            count = 0

            for inputs, targets in self.dataloader_train:
                inputs, targets = inputs.cuda(), targets.cuda()
                optimizer.zero_grad()
                outputs = model(inputs)
                mean = outputs[:, 0]
                log_var = outputs[:, 1]
                variance = torch.exp(log_var)  # 로그 분산을 분산으로 변환

                loss = nll_loss(mean, log_var, targets)
                loss.backward()
                optimizer.step()

                train_loss_sum += loss.item() * inputs.size(0)
                train_mean_sum += mean.sum().item()
                train_variance_sum += variance.sum().item()
                count += inputs.size(0)

            train_loss_avg = train_loss_sum / count
            train_mean_avg = train_mean_sum / count  # 훈련 데이터 평균의 평균
            train_variance_avg = train_variance_sum / count  # 훈련 데이터 분산의 평균

            val_loss_avg, val_mae_avg, val_mean_avg, val_variance_avg = self.validate(model)
            scheduler.step(val_loss_avg)  # Scheduler 업데이트

            self.evaluate_uncertainty()

            # # WandB 로깅
            # wandb.log({
            #     f"Epoch": epoch,
            #     f"Train Loss Model {model_idx+1}": train_loss_avg,
            #     f"Validation Loss Model {model_idx+1}": val_loss_avg,
            #     f"Validation MAE Model {model_idx+1}": val_mae_avg,
            #     f"Train Mean Model {model_idx+1}": train_mean_avg,
            #     f"Train Variance Model {model_idx+1}": train_variance_avg,
            #     f"Validation Mean Model {model_idx+1}": val_mean_avg,
            #     f"Validation Variance Model {model_idx+1}": val_variance_avg,
            # })

            # print(f"Epoch [{epoch+1}/{self.epochs}], Train Loss: {train_loss_avg:.4f}, Val Loss: {val_loss_avg:.4f}, Val MAE: {val_mae_avg:.4f}, Train Mean: {train_mean_avg:.4f}, Train Variance: {train_variance_avg:.4f}")


    def train_ensemble(self):
        for model_idx in range(len(self.models)):
            self.train_single_model(model_idx)
            self.save(model_idx)
        self.evaluate_uncertainty()


    def test_ensemble(self):
        """앙상블 모델로 테스트 데이터에 대한 예측을 수행하고 평균, 분산, MAE를 계산"""
        ensemble_predictions = []
        ensemble_variances = []
        mae_list = []
        with torch.no_grad():
            for model_idx, model in enumerate(self.models):
                model.eval()  # 모델을 평가 모드로 설정
                predictions = []
                variances = []
                total_mae = 0
                count = 0
                for inputs, targets in self.dataloader_test:
                    inputs, targets = inputs.cuda(), targets.cuda()
                    outputs = model(inputs)
                    mean = outputs[:, 0]
                    log_var = outputs[:, 1]
                    variance = torch.exp(log_var)  # 로그 분산을 분산으로 변환

                    mae = torch.abs(mean - targets).mean().item()
                    total_mae += mae * inputs.size(0)
                    count += inputs.size(0)
                    
                    predictions.append(mean)
                    variances.append(variance)

                model_mae = total_mae / count
                mae_list.append(model_mae)
                
                # 모델별 예측 수행 및 저장
                model_predictions = torch.cat(predictions, dim=0)
                model_variances = torch.cat(variances, dim=0)
                ensemble_predictions.append(model_predictions)
                ensemble_variances.append(model_variances)

            # 앙상블 예측을 위한 평균 및 분산 계산
            final_prediction = torch.mean(torch.stack(ensemble_predictions), dim=0)
            final_variance = torch.mean(torch.stack(ensemble_variances), dim=0) + torch.var(torch.stack(ensemble_predictions), dim=0)
            
            # 앙상블에 대한 MAE 계산
            ensemble_mae = sum(mae_list) / len(mae_list)

            # # WandB에 테스트 결과 로깅
            # wandb.log({
            #     "Test Mean": final_prediction.mean().item(),
            #     "Test Variance": final_variance.mean().item(),
            #     "Test MAE": ensemble_mae
            # })

            return final_prediction, final_variance, ensemble_mae



    def predict_with_features(self, model, features):
        predictions = []
        for feature in features:
            prediction = model.module.fc(feature.unsqueeze(0))  # feature.unsqueeze(0)는 배치 차원을 추가
            predictions.append(prediction)
        return torch.cat(predictions, dim=0)




    def save(self, model_idx):
        model_save_path = f"{self.results_folder}/model_{model_idx}.pth"
        torch.save({"epoch": model_idx+1, 
                    "state_dict": self.model.state_dict(), 
                    "optimizer" : self.optimizer.state_dict(),  
                    "train_mae_list": self.train_mae_list,
                    "valid_mae_list": self.valid_mae_list},  
                    model_save_path)
        print(f"Model {model_idx+1} saved to {model_save_path}")

    def load(self, checkpoint):
        self.model.load_state_dict(checkpoint["state_dict"])
        self.optimizer.load_state_dict(checkpoint["optimizer"])
        self.epoch = checkpoint["epoch"]  # Get the epoch directly from the checkpoint
        self.train_mse_list = checkpoint.get("train_mse_list", [])
        self.train_mae_list = checkpoint.get("train_mae_list", [])
        self.valid_mse_list = checkpoint.get("valid_mse_list", [])
        self.valid_mae_list = checkpoint.get("valid_mae_list", [])

    
    def save_features(self, milestone):
        all_features = torch.cat(self.feature_list, dim=0)
        # features 디렉토리 경로 설정
        features_folder = Path(self.results_folder) / 'feat'
        features_folder.mkdir(parents=True, exist_ok=True)
        torch.save(all_features, f"{features_folder}/cv-{self.cv_num}-features-{milestone+1}.pt")



# Model define
def initialize_weights(model):
    for m in model.modules():
        if isinstance(m, nn.Conv3d) or isinstance(m, nn.Linear):
            nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
            
# Define 3D_CNN model class

class CNN(nn.Module):
    def __init__(self, in_channels):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv3d(in_channels, 32, kernel_size=(3,3,3), padding=1),
            nn.ReLU(),
            nn.BatchNorm3d(32),
            nn.Conv3d(32, 32, kernel_size=(3,3,3), padding=1),
            nn.ReLU(),
            nn.Dropout3d(0.2),
            nn.MaxPool3d(kernel_size=(2,2,2))
        )

        self.conv2 = nn.Sequential(
            nn.Conv3d(32, 64, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(64),
            nn.Conv3d(64, 64, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.Dropout3d(0.2),
            nn.MaxPool3d(kernel_size=(2,2,2))
        )

        self.conv3 = nn.Sequential(
            nn.Conv3d(64, 64, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(64),
            nn.Conv3d(64, 64, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(64),
            nn.MaxPool3d(2)
        )

        self.conv4 = nn.Sequential(
            nn.Conv3d(64, 64, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(64),
            nn.Conv3d(64, 64, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(64),
            nn.Conv3d(64, 64, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(64),
            nn.MaxPool3d(2)
        )

        self.conv5 = nn.Sequential(
            nn.Conv3d(64, 96, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(96),
            nn.Conv3d(96, 96, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(96),
            nn.Conv3d(96, 96, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(96),
            nn.MaxPool3d(2)
        )

        self.conv6 = nn.Sequential(
            nn.Conv3d(96, 96, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(96),
            nn.Conv3d(96, 96, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(96),
            nn.Conv3d(96, 96, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(96),
            nn.MaxPool3d(2)
        )

        self.conv7 = nn.Sequential(
            nn.Conv3d(96, 96, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(96),
            nn.Conv3d(96, 96, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(96),
            nn.Conv3d(96, 96, kernel_size=(3,3,3), stride=1, padding="same"),
            nn.ReLU(),
            nn.BatchNorm3d(96)
        )
        
        self.flatten = nn.Flatten()

        # 평균과 로그 분산을 출력하기 위한 fully connected 레이어
        # 출력 차원을 2로 설정 (하나는 평균, 하나는 로그 분산)
        self.fc = nn.Sequential(
            nn.Linear(768, 96),
            nn.ReLU(),
            nn.Linear(96, 32),
            nn.ReLU(),
            nn.Linear(32, 2)  # 출력 차원을 2로 변경
        )

    def forward_features(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv7(x)
        x = self.flatten(x)
        return x
            
    def forward(self, x):
        x = self.forward_features(x)
        x = self.fc(x)
        # 출력이 평균과 로그 분산으로 구성
        return x

In [39]:
dataset_df = pd.read_csv('/media/leelabsg-storage1/yein/research/BAE/RegionBAE/data/adni_cn_region.csv')
dataset_df.head()

Unnamed: 0,subjectID,imgs,mask,age,gender,caudate,caudate_mask,cerebellum,cerebellum_mask,frontal_lobe,...,occipital_lobe,occipital_lobe_mask,parietal_lobe,parietal_lobe_mask,putamen,putamen_mask,temporal_lobe,temporal_lobe_mask,thalamus,thalamus_mask
0,sub-002S0295X20110602,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,90.0,M,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...
1,sub-002S0413X20060519,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,76.5,F,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...
2,sub-002S0559X20060627,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,79.5,M,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...
3,sub-002S0685X20110708,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,94.7,F,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...
4,sub-002S1261X20070227,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,71.2,F,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...,/media/leelabsg-storage1/yein/research/data/ad...


In [40]:
DATA_SIZE = 630
regions = {0: 'imgs', 1: 'caudate', 2: 'cerebellum', 3: 'frontal_lobe', 4: 'insula', 5: 'occipital_lobe', 6: 'parietal_lobe', 7: 'putamen', 8: 'temporal_lobe', 9: 'thalamus'}

In [41]:
ROI = regions[0]
print("=" * 20, ROI, "=" * 20)
dataset_indices = list(dataset_df.index)[:DATA_SIZE]
test_dataset = Region_Dataset(dataset_df, dataset_indices, ROI)
dataloader_test = DataLoader(test_dataset, 
                            batch_size=1, 
                            sampler=SequentialSampler(test_dataset),
                            collate_fn=test_dataset.collate_fn,
                            pin_memory=True,
                            num_workers=2)

# hypterparameters
BATCH_SIZE = 8
EPOCHS = 2
RESULTS_FOLDER = './test'
INPUT_SIZE = (1, 128, 128, 128)
LEARNING_RATE = 1e-6
N_WORKERS = 2

# Initialize your model (Make sure it's the same architecture as the one you trained)
model = CNN(in_channels=1).cuda()  
# Put your model on the GPU
model = torch.nn.DataParallel(model)

# Define your optimizer and scheduler
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
t_0 = int(DATA_SIZE * 0.75 // BATCH_SIZE // 6)
scheduler = CustomCosineAnnealingWarmUpRestarts(optimizer,T_0= t_0, T_up=10, T_mult=2, eta_max=1e-3, gamma=0.5)

# Loss function
mse_criterion = torch.nn.MSELoss()
mae_criterion = torch.nn.L1Loss()

# Initialize the trainer
trainer = CNN_Trainer(model, RESULTS_FOLDER, None, None, dataloader_test, EPOCHS, optimizer, scheduler)

model_root = '/media/leelabsg-storage1/yein/research/model/region_BAE/deep_ensemble'
all_model_paths = glob.glob(model_root + f'/{ROI}/*')
best_model_path = all_model_paths[0]
# max_epoch = 0
# for path in all_model_paths:
#     epoch = int(path.split('.')[0][-1])
#     if epoch > max_epoch:
#         max_epoch = epoch
#         best_model_path = path
print("Model: ", best_model_path)
checkpoint = torch.load(best_model_path, map_location='cuda')
trainer.load(checkpoint)
predictions, targets = trainer.test()

Model:  /media/leelabsg-storage1/yein/research/model/region_BAE/deep_ensemble/imgs/model_0.pth


AttributeError: 'CNN_Trainer' object has no attribute 'model'

In [5]:
for i in regions.keys():
    ROI = regions[i]
    print("=" * 20, ROI, "=" * 20)
    dataset_indices = list(dataset_df.index)[:DATA_SIZE]
    test_dataset = Region_Dataset(dataset_df, dataset_indices, ROI)
    dataloader_test = DataLoader(test_dataset, 
                                batch_size=1, 
                                sampler=SequentialSampler(test_dataset),
                                collate_fn=test_dataset.collate_fn,
                                pin_memory=True,
                                num_workers=2)
    
    # hypterparameters
    BATCH_SIZE = 8
    EPOCHS = 2
    RESULTS_FOLDER = './test'
    INPUT_SIZE = (1, 128, 128, 128)
    LEARNING_RATE = 1e-6
    N_WORKERS = 2
    
    # Initialize your model (Make sure it's the same architecture as the one you trained)
    model = CNN(in_channels=1).cuda()
    model.apply(initialize_weights)
    model = torch.nn.DataParallel(model)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = lr.CustomCosineAnnealingWarmUpRestarts(optimizer,T_0= t_0, T_up=10, T_mult=2, eta_max=1e-3, gamma=0.5)
    
    # 훈련 클래스 초기화

    trainer = CNN_Trainer(model, OUTPUT, dataloader_train, dataloader_valid, None, EPOCHS, optimizer, scheduler, model_load=MODEL_LOAD)
    

    model_root = '/media/leelabsg-storage1/yein/research/model/region_BAE/deep_ensemble'
    all_model_paths = glob.glob(model_root + f'/{ROI}/*')
    best_model_path = ''
    max_epoch = 0
    for path in all_model_paths:
        epoch = int(path.split('/')[-1].split('-')[2].split('.')[0])
        if epoch > max_epoch:
            max_epoch = epoch
            best_model_path = path
    print("Model: ", best_model_path)
    checkpoint = torch.load(best_model_path, map_location='cuda')
    trainer.load(checkpoint)
    predictions, targets = trainer.test()

Model:  /media/leelabsg-storage1/yein/research/model/region/imgs/cv-0-97.pth.tar


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 630/630 [00:52<00:00, 12.02it/s]


test mse loss = 90.611 / test mae loss = 8.145
Model:  /media/leelabsg-storage1/yein/research/model/region/caudate/cv-0-30.pth.tar


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 630/630 [00:53<00:00, 11.69it/s]


test mse loss = 134.677 / test mae loss = 9.636
Model:  /media/leelabsg-storage1/yein/research/model/region/cerebellum/cv-0-56.pth.tar


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 630/630 [01:22<00:00,  7.67it/s]


test mse loss = 102.942 / test mae loss = 8.416
Model:  /media/leelabsg-storage1/yein/research/model/region/frontal_lobe/cv-0-91.pth.tar


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 630/630 [00:47<00:00, 13.21it/s]


test mse loss = 136.601 / test mae loss = 9.897
Model:  /media/leelabsg-storage1/yein/research/model/region/insula/cv-0-34.pth.tar


 65%|███████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                       | 411/630 [00:58<00:31,  7.04it/s]


KeyboardInterrupt: 

# 각 region 별 volume 값 계산하여 mean, variance 확인

In [None]:
import pandas as pd
import os
import ants
from helpers import *
import gc
import glob

In [None]:
region_wise_volumes = {
    'age': dataset_df['age'].to_list(),
    'gender': dataset_df['gender'].to_list()
}

regions = {0: 'imgs', 1: 'caudate', 2: 'cerebellum', 3: 'frontal_lobe', 4: 'insula', 5: 'occipital_lobe', 6: 'parietal_lobe', 7: 'putamen', 8: 'temporal_lobe', 9: 'thalamus'}
for region in regions.values():
    region_wise_volumes[region] = []

# 초기 'age'와 'gender' 리스트에 대한 길이 검증
assert len(region_wise_volumes['age']) == len(dataset_df), "Age list length mismatch"
assert len(region_wise_volumes['gender']) == len(dataset_df), "Gender list length mismatch"

for i in range(len(dataset_df)): # 대신 작은 숫자로 테스트
    for ROI in regions.values():
        try:
            img_path = dataset_df.iloc[i][ROI]  # 올바른 경로로 접근
            img = ants.image_read(img_path)  # 이미지 로드
            img_arr = img.numpy()  # ANTs 이미지 객체를 NumPy 배열로 변환
            nonzero_count = np.count_nonzero(img_arr)  # 0이 아닌 픽셀의 개수 계산
            region_wise_volumes[ROI].append(nonzero_count)  # 결과 저장
        except Exception as e:
            print(f"Error processing {ROI} for row {i}: {e}")
            region_wise_volumes[ROI].append(None)  # 오류 발생 시 None 추가



In [None]:
region_wise_volumes_df = pd.DataFrame(region_wise_volumes)
region_wise_volumes_df

In [None]:
# 필요한 라이브러리 임포트
import pandas as pd
import numpy as np

# region_wise_volumes_df는 이미 생성된 데이터프레임 변수라고 가정

# 각 영역 볼륨의 평균과 분산 계산
volume_stats = {}
regions = ['caudate', 'cerebellum', 'frontal_lobe', 'insula', 'occipital_lobe', 'parietal_lobe', 'putamen', 'temporal_lobe', 'thalamus']

for region in regions:
    volume_stats[region] = {
        'mean': region_wise_volumes_df[region].mean(),
        'variance': region_wise_volumes_df[region].var(),
        'mean_ratio': region_wise_volumes_df[region].mean() / region_wise_volumes_df['imgs'].mean()
    }

# 결과를 DataFrame으로 변환하여 표시
volume_stats_df = pd.DataFrame(volume_stats).T
volume_stats_df


In [None]:
import matplotlib.pyplot as plt

# regions 딕셔너리 정의가 이전과 동일하다고 가정합니다.
regions = {0: 'imgs', 1: 'caudate', 2: 'cerebellum', 3: 'frontal_lobe', 4: 'insula', 
           5: 'occipital_lobe', 6: 'parietal_lobe', 7: 'putamen', 
           8: 'temporal_lobe', 9: 'thalamus'}

# 각 영역에 대한 히스토그램을 그립니다.
plt.figure(figsize=(10, 8))  # 그래프의 크기를 설정합니다.

# 각 영역 별로 히스토그램을 그리는 반복문
for region in regions.values():
    region_wise_volumes_df[region].hist(alpha=0.5, label=region)  # alpha는 투명도를 조절합니다.

plt.legend()  # 범례를 표시합니다.
plt.xlabel('Volume')  # x축 라벨
plt.ylabel('Frequency')  # y축 라벨
plt.title('Histogram of Volumes by Region')  # 그래프 제목
plt.show()  # 그래프를 화면에 표시합니다.
