In [2]:
import numpy as np
import pandas as pd
import sklearn
import torch

In [3]:
data = np.load(f"./data/abide.npy",allow_pickle=True).item()
print("Number of subjects: ", len(data['label']))
data.keys()

Number of subjects:  1009


dict_keys(['timeseires', 'label', 'corr', 'pcorr', 'site'])

In [4]:
conn = data["corr"]
print(f"Connectome (X) shape: {conn.shape}") # n_ROIs, n_ROIs, n_subjects

Connectome (X) shape: (1009, 116, 116)


In [5]:
scores = data["label"]
print(f"ASD scores (y) shape: {scores.shape}") # n_subjects

ASD scores (y) shape: (1009,)


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


In [38]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(13456, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 12),
            nn.ReLU(),
            nn.Linear(12, 3)  # Compressed representation
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(3, 12),
            nn.ReLU(),
            nn.Linear(12, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 13456),
            nn.Sigmoid()  # Output values between 0 and 1
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


In [39]:
# Train autoencoder using the corr matrices
# Split dataset into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(conn, scores, test_size=0.2, random_state=42)

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (807, 116, 116)
X_test shape: (202, 116, 116)
y_train shape: (807,)
y_test shape: (202,)


In [40]:
model = Autoencoder().float()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)


36.63636363636363

In [49]:
batch_size = 8
# change the length of X_train to match batch_size
X_train = X_train[:len(X_train) - (len(X_train) % batch_size)]
print(f"X_train shape: {X_train.shape}")
train_loader = DataLoader(X_train, batch_size=1, shuffle=True)

X_train shape: (800, 116, 116)


In [51]:

num_epochs = 10
for epoch in range(num_epochs):
    for data in train_loader:
        matrix = data.float()
        # reshape
        matrix = matrix.view(matrix.size(0), -1)
        output = model(matrix)
        loss = criterion(output, matrix)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    # Evaluate on test set
    test_matrix = torch.tensor(X_test).float()
    test_matrix = test_matrix.view(test_matrix.size(0), -1)
    test_output = model(test_matrix)
    test_loss = criterion(test_output, test_matrix)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Test loss: {test_loss.item():.4f}')



Epoch [1/10], Loss: 0.0286, Test loss: 0.0351
Epoch [2/10], Loss: 0.0216, Test loss: 0.0349
Epoch [3/10], Loss: 0.0300, Test loss: 0.0349
Epoch [4/10], Loss: 0.0302, Test loss: 0.0349
Epoch [5/10], Loss: 0.0288, Test loss: 0.0350
Epoch [6/10], Loss: 0.0504, Test loss: 0.0349
Epoch [7/10], Loss: 0.0290, Test loss: 0.0350
Epoch [8/10], Loss: 0.0385, Test loss: 0.0348
Epoch [9/10], Loss: 0.0202, Test loss: 0.0340
Epoch [10/10], Loss: 0.0292, Test loss: 0.0338


In [52]:
# Get predictions for all data (train and test)
all_data = torch.tensor(conn).float()
all_data = all_data.view(all_data.size(0), -1)
all_output = model(all_data)
all_loss = criterion(all_output, all_data)
print(f'Loss: {all_loss.item():.4f}')

Loss: 0.0329


In [56]:
# Get the compressed representation
compressed = model.encoder(all_data)
compressed = compressed.detach().numpy()
print(f"Compressed representation shape: {compressed.shape}")
compressed

Compressed representation shape: (1009, 3)


array([[ 1.0716132 ,  1.151193  ,  0.2651278 ],
       [ 1.4225651 ,  0.9257535 ,  0.03692634],
       [ 0.03002885, -0.00735715,  0.19386314],
       ...,
       [ 1.1388929 ,  0.8494133 ,  0.1154813 ],
       [ 1.2313913 ,  1.0216595 ,  0.150218  ],
       [ 0.8533045 ,  1.249581  ,  0.38994104]], dtype=float32)