In [1]:
import numpy as np
import pandas as pd
import sklearn
import torch

In [2]:
data = np.load(f"./data/abide.npy",allow_pickle=True).item()
print("Number of subjects: ", len(data['label']))
data.keys()

Number of subjects:  1009


dict_keys(['timeseires', 'label', 'corr', 'pcorr', 'site'])

In [3]:
conn = data["corr"]
print(f"Connectome (X) shape: {conn.shape}") # n_ROIs, n_ROIs, n_subjects

Connectome (X) shape: (1009, 116, 116)


In [4]:
scores = data["label"]
print(f"ASD scores (y) shape: {scores.shape}") # n_subjects

ASD scores (y) shape: (1009,)


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


In [6]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(13456, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 12),
            nn.ReLU(),
            nn.Linear(12, 3)  # Compressed representation
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(3, 12),
            nn.ReLU(),
            nn.Linear(12, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 13456),
            nn.Sigmoid()  # Output values between 0 and 1
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


In [7]:
# Train autoencoder using the corr matrices
# Split dataset into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(conn, scores, test_size=0.2, random_state=42)

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (807, 116, 116)
X_test shape: (202, 116, 116)
y_train shape: (807,)
y_test shape: (202,)


In [8]:
model = Autoencoder().float()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)


In [9]:
batch_size = 8
# change the length of X_train to match batch_size
X_train = X_train[:len(X_train) - (len(X_train) % batch_size)]
print(f"X_train shape: {X_train.shape}")
train_loader = DataLoader(X_train, batch_size=1, shuffle=True)

X_train shape: (800, 116, 116)


In [10]:

num_epochs = 10
for epoch in range(num_epochs):
    for data in train_loader:
        matrix = data.float()
        # reshape
        matrix = matrix.view(matrix.size(0), -1)
        output = model(matrix)
        loss = criterion(output, matrix)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    # Evaluate on test set
    test_matrix = torch.tensor(X_test).float()
    test_matrix = test_matrix.view(test_matrix.size(0), -1)
    test_output = model(test_matrix)
    test_loss = criterion(test_output, test_matrix)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Test loss: {test_loss.item():.4f}')



Epoch [1/10], Loss: 0.0366, Test loss: 0.0383
Epoch [2/10], Loss: 0.0385, Test loss: 0.0395
Epoch [3/10], Loss: 0.0438, Test loss: 0.0387
Epoch [4/10], Loss: 0.0381, Test loss: 0.0381
Epoch [5/10], Loss: 0.0916, Test loss: 0.0382
Epoch [6/10], Loss: 0.0417, Test loss: 0.0374
Epoch [7/10], Loss: 0.0408, Test loss: 0.0361
Epoch [8/10], Loss: 0.0311, Test loss: 0.0367
Epoch [9/10], Loss: 0.0575, Test loss: 0.0358
Epoch [10/10], Loss: 0.0507, Test loss: 0.0357


In [11]:
# Train on all data as it is an unsupervised task

# Initiliaze model
model = Autoencoder().float()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Determine batch size and adapt dataset
batch_size = 4
# change the length of X_train to match batch_size
conn = conn[:len(conn) - (len(conn) % batch_size)]
print(f"Initial X shape: {conn.shape}")
print(f"Adapted X shape: {conn.shape}")
X_loader = DataLoader(conn, batch_size=1, shuffle=True)

num_epochs = 10
for epoch in range(num_epochs):
    for data in X_loader:
        matrix = data.float()
        # reshape
        matrix = matrix.view(matrix.size(0), -1)
        output = model(matrix)
        loss = criterion(output, matrix)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')



X shape: (1008, 116, 116)
Epoch [1/10], Loss: 0.0254
Epoch [2/10], Loss: 0.0316
Epoch [3/10], Loss: 0.0221
Epoch [4/10], Loss: 0.0301
Epoch [5/10], Loss: 0.0276
Epoch [6/10], Loss: 0.0257
Epoch [7/10], Loss: 0.0450
Epoch [8/10], Loss: 0.0369
Epoch [9/10], Loss: 0.0277
Epoch [10/10], Loss: 0.0279


In [12]:
# Get predictions for all data (train and test)
all_data = torch.tensor(conn).float()
all_data = all_data.view(all_data.size(0), -1)
all_output = model(all_data)
all_loss = criterion(all_output, all_data)
print(f'Loss: {all_loss.item():.4f}')

Loss: 0.0343


In [13]:
# Get the compressed representation
compressed = model.encoder(all_data)
compressed = compressed.detach().numpy()
print(f"Compressed representation shape: {compressed.shape}")
compressed

Compressed representation shape: (1008, 3)


array([[ 1.7905575 ,  3.0867612 , -1.0521109 ],
       [ 1.7585279 ,  3.037877  , -1.0296388 ],
       [-0.01863605,  0.32553953,  0.21722454],
       ...,
       [ 4.1797085 ,  6.7331233 , -2.7283459 ],
       [ 1.5395811 ,  2.7037168 , -0.876025  ],
       [ 1.6772534 ,  2.9138343 , -0.97261626]], dtype=float32)

In [15]:
# Save compressed representation to .npy
PATH = './mri_features.npy'


(1008, 3)