In [26]:
# Torch-related imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, Subset

# Scikit-learn-related imports
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import confusion_matrix, accuracy_score

# Nibabel and Scipy imports (for handling fMRI and image processing)
import nibabel as nib
import scipy.ndimage as ndimage  # For smoothing

# NumPy, Matplotlib, and Seaborn (for data manipulation and visualization)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# OS for file system operations
import os

In [27]:
os.environ['CUDA_LAUNCH_BLOCKING'] = '0'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


# Preprocessing

In [28]:
# from google.colab import drive
# drive.mount('/content/drive')

# root_dir = os.path.join('/content/drive', 'My Drive', 'UCR', '2-2024', 'InvCC', 'ADHD200', 'Datasets', 'preprocessed')

root_dir = os.path.join('preprocessed')

# Carpetas para TDC y ADHD
tdc_dir = os.path.join(root_dir, 'TDC')
adhd_dir = os.path.join(root_dir, 'ADHD')

# Para guardar el estado del autoencoder
save_path = os.path.join(root_dir, 'autoencoder.pt')

# Listas para almacenar las rutas de archivos
tdc_file_paths = [os.path.join(tdc_dir, file) for file in os.listdir(tdc_dir) if file.endswith('.nii.gz')]
adhd_file_paths = [os.path.join(adhd_dir, file) for file in os.listdir(adhd_dir) if file.endswith('.nii.gz')]

# Etiquetas correspondientes
tdc_labels = [0] * len(tdc_file_paths)
adhd_labels = [1] * len(adhd_file_paths)

# Combinar rutas de archivos y etiquetas
file_paths = tdc_file_paths + adhd_file_paths
labels = tdc_labels + adhd_labels

In [29]:
class FMRI_Dataset(Dataset):
    def __init__(self, file_paths, labels, max_shape, smoothing_sigma=1):
        self.file_paths = file_paths  # List of paths to the fMRI data files
        self.labels = labels  # Corresponding labels
        self.max_shape = max_shape  # Shape to pad all inputs to
        self.smoothing_sigma = smoothing_sigma  # Standard deviation for Gaussian smoothing

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        # Load fMRI data using NiBabel
        fmri_img = nib.load(self.file_paths[idx])
        data = fmri_img.get_fdata()  # Extract the fMRI data as a NumPy array

        # Apply smoothing
        # data = self.smooth_data(data)

        # Normalize the data
        data = self.normalize_data(data)

        # Convert NumPy array to a PyTorch tensor
        data = torch.tensor(data, dtype=torch.float32)

        # Get the label
        label = torch.tensor(self.labels[idx])

        # Pad the tensor to the max_shape
        data_padded = F.pad(data, pad=self.calculate_padding(data.shape), mode='constant', value=0)

        return data_padded, label

    def calculate_padding(self, current_shape):
        padding = []
        for current_dim, max_dim in zip(reversed(current_shape), reversed(self.max_shape)):
            pad_total = max_dim - current_dim
            padding.append(pad_total // 2)  # pad_left or pad_top, etc.
            padding.append(pad_total - (pad_total // 2))  # pad_right or pad_bottom, etc.

        return padding

    def normalize_data(self, data):
        """Normalize the data to zero mean and unit variance."""
        mean = data.mean()
        std = data.std()
        if std > 0:  # Avoid division by zero
            data = (data - mean) / std
        return data

    def smooth_data(self, data):
        """Apply Gaussian smoothing to the data."""
        return ndimage.gaussian_filter(data, sigma=self.smoothing_sigma)

In [30]:
# Determine the maximum shape across all tensors
max_shape = [1, 53, 64, 46, 512]

# Create the dataset with padded tensors
dataset = FMRI_Dataset(file_paths, labels, max_shape)

sample_data, sample_label = dataset[0]
print(sample_data.shape, sample_label)

torch.Size([53, 64, 46, 512]) tensor(0)


In [31]:
# Crear el objeto StratifiedShuffleSplit para dividir en conjuntos de entrenamiento y prueba
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

# Dividir en entrenamiento y prueba
for train_val_idx, test_idx in sss.split(range(len(dataset)), dataset.labels):
    train_val_set = torch.utils.data.Subset(dataset, train_val_idx)
    testset = torch.utils.data.Subset(dataset, test_idx)

# Ahora crear un segundo StratifiedShuffleSplit para los conjuntos de entrenamiento y validación
sss_val = StratifiedShuffleSplit(n_splits=1, test_size=0.25, random_state=42)  # 0.25 de 0.8 es 0.2 total para validación

# Acceder a las etiquetas del conjunto de entrenamiento y validación
train_val_labels = [dataset.labels[i] for i in train_val_idx]  # Obtener las etiquetas usando una lista de comprensión

for train_idx, val_idx in sss_val.split(range(len(train_val_set)), train_val_labels):
    trainset = torch.utils.data.Subset(train_val_set, train_idx)
    valset = torch.utils.data.Subset(train_val_set, val_idx)

# Crear los dataloaders
batch_size = 1
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=0)
valloader = DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=0)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=0)

class_names = ["TDC", "ADHD"]

# Iterate over the dataloaders to verify they are being processed correctly
for batch in trainloader:
    if batch[0] is not None:
        images, _ = batch
        break
        # print(f"Shape of inputs: {images.shape}, Shape of labels: {_}")


for batch in testloader:
    if batch[0] is not None:
        images, _ = batch
        break
        # print(f"Shape of inputs: {images.shape}, Shape of labels: {_}")


In [32]:
print(f"Number of samples in valloader: {len(valloader.dataset)}")
print(f"Number of samples in trainloader: {len(trainloader.dataset)}")
print(f"Number of samples in testloader: {len(testloader.dataset)}")

Number of samples in valloader: 34
Number of samples in trainloader: 102
Number of samples in testloader: 34


# CNN-AE

In [33]:
class CNN_Autoencoder(nn.Module):
    def __init__(self):
        super(CNN_Autoencoder, self).__init__()

        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv3d(1, 16, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Dropout3d(0.2),
            nn.Conv3d(16, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Dropout3d(0.2),
            nn.Conv3d(64, 128, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Dropout3d(0.2),
            nn.Conv3d(128, 256, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
        )

        # Decoder
        self.decoder = nn.Sequential(
            nn.ConvTranspose3d(256, 128, kernel_size=3, stride=2, padding=(1, 1, 1), output_padding=(0, 1, 0)),
            nn.ReLU(),
            nn.ConvTranspose3d(128, 64, kernel_size=3, stride=2, padding=(1, 1, 1), output_padding=(1, 0, 1)),
            nn.ReLU(),
            nn.ConvTranspose3d(64, 16, kernel_size=3, stride=2, padding=(1, 0, 0), output_padding=(0, 0, 1)),
            nn.ReLU(),
            nn.ConvTranspose3d(16, 1, kernel_size=3, stride=2, padding=(1, 0, 0), output_padding=(0, 1, 1)),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# Example usage
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
autoencoder = CNN_Autoencoder().to(device)

# Generate random input matching new shape [1, 1, 53, 64] (Batch size 1)
inputs = torch.rand((1, 53, 64, 46)).to(device)  # Example input
output = autoencoder(inputs)
print(output.shape)  # should match the input shape [1, 53, 64, 46]

# Loss and optimizer
criterion = nn.MSELoss()  # Since it's an autoencoder, Mean Squared Error is commonly used
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001, weight_decay=1e-5)


torch.Size([1, 53, 64, 46])


In [34]:
# Number of epochs
epochs = 100

# Track the best validation loss
best_val_loss = float('inf')

for epoch in range(epochs):
    # Training phase
    autoencoder.train()  # Set the model to training mode
    total_loss = 0.0
    for batch_idx, (inputs, labels) in enumerate(trainloader):
        inputs_reduced = torch.mean(inputs, dim=-1)
        inputs = inputs_reduced

        inputs = inputs.to(device)  # Send to GPU if available

        # Forward pass
        outputs = autoencoder(inputs)
        loss = criterion(outputs, inputs)  # Compare reconstruction with original input

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # Calculate average training loss
    avg_train_loss = total_loss / len(trainloader)
    print(f"Epoch [{epoch+1}/{epochs}], Training Loss: {avg_train_loss}")

    # Validation phase
    autoencoder.eval()  # Set the model to evaluation mode
    total_val_loss = 0.0
    with torch.no_grad():  # Disable gradient calculation
        for val_inputs, val_labels in valloader:
            val_inputs_reduced = torch.mean(val_inputs, dim=-1)
            val_inputs = val_inputs_reduced.to(device)  # Send validation inputs to GPU if available

            # Forward pass
            val_outputs = autoencoder(val_inputs)
            val_loss = criterion(val_outputs, val_inputs)  # Compare reconstruction with original input
            total_val_loss += val_loss.item()

    # Calculate average validation loss
    avg_val_loss = total_val_loss / len(valloader)
    print(f"Epoch [{epoch+1}/{epochs}], Validation Loss: {avg_val_loss}")

    # Save the model if the validation loss has decreased
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(autoencoder.state_dict(), save_path)
        print(f"Model saved at epoch {epoch+1} with validation loss: {avg_val_loss}")

# Final save after training
torch.save(autoencoder.state_dict(), save_path)

Epoch [1/100], Training Loss: 0.21615308322304605
Epoch [1/100], Validation Loss: 0.1898005849079174
Model saved at epoch 1 with validation loss: 0.1898005849079174
Epoch [2/100], Training Loss: 0.18170130117705055
Epoch [2/100], Validation Loss: 0.18593355728422895
Model saved at epoch 2 with validation loss: 0.18593355728422895
Epoch [3/100], Training Loss: 0.17623058249991314
Epoch [3/100], Validation Loss: 0.17841029353439808
Model saved at epoch 3 with validation loss: 0.17841029353439808
Epoch [4/100], Training Loss: 0.15812896418513037
Epoch [4/100], Validation Loss: 0.12286891503369107
Model saved at epoch 4 with validation loss: 0.12286891503369107
Epoch [5/100], Training Loss: 0.10846456467155732
Epoch [5/100], Validation Loss: 0.09919735504423871
Model saved at epoch 5 with validation loss: 0.09919735504423871
Epoch [6/100], Training Loss: 0.0941785421112881
Epoch [6/100], Validation Loss: 0.09095219747327707
Model saved at epoch 6 with validation loss: 0.09095219747327707
E