In [None]:
import pandas as pd
import numpy as np
import nibabel as nib
from nilearn import input_data, datasets
from statsmodels.tsa.stattools import grangercausalitytests
import networkx as nx
from sklearn.model_selection import train_test_split
from torch_geometric.data import Data
import torch
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import os

# Load the CSV file
csv_file = pd.read_csv(r"/Users/vinoth/PycharmProjects/paper_implementation/Dataset/source/mri_images/ABIDE_pcp/Phenotypic_V1_0b_preprocessed1.csv")

# Replace labels 1 and 2 with 0 and 1
csv_file['DX_GROUP'].replace({1: 0, 2: 1}, inplace=True)

# Split data into training and test sets
train_df, test_df = train_test_split(csv_file, test_size=0.2, random_state=42)

# Load the Harvard-Oxford atlas
atlas = datasets.fetch_atlas_harvard_oxford('cort-maxprob-thr25-2mm')
masker = input_data.NiftiLabelsMasker(labels_img=atlas.maps, standardize=True)

# MRI file directory
mri_dir = r"/Users/vinoth/PycharmProjects/paper_implementation/Dataset/source/mri_images/ABIDE_pcp/cpac/nofilt_noglobal/"

# Placeholder for Graph Neural Network Data
graph_data_list = []

# Placeholder for time_series of all subjects
time_series_all_subjects = []
labels_all_subjects = []

for idx, row in enumerate(train_df.itertuples()):
    print(idx)
    mri_filename = os.path.join(mri_dir, row.FILE_ID + "_func_preproc.nii.gz")
    try:
        mri_img = nib.load(mri_filename)

        # Calculate time series
        time_series = masker.fit_transform(mri_img)
        print(time_series)
        print("*******************************")

        # Store time series and corresponding label for all subjects
        time_series_all_subjects.append(time_series)
        labels_all_subjects.append(row.DX_GROUP)
    except FileNotFoundError:
        pass

# Placeholder for adjacency matrices
adjacency_matrices = []

# Calculate the Granger causality for all pairs of ROIs
count = 1
for ts in time_series_all_subjects:
    num_regions = ts.shape[1]
    adjacency_matrix = np.zeros((num_regions, num_regions))

    for i in range(num_regions):
        for j in range(num_regions):
            print(count)
            if i != j:
                # We adjust the maximum lag dynamically according to the length of time series data
                maxlag = min(len(ts[:, i]), len(ts[:, j]), ts.shape[0]//3 - 1)
                result = grangercausalitytests(ts[:, [i, j]], maxlag=maxlag, verbose=False)
                p_values = [round(result[i+1][0]['ssr_ftest'][1], 4) for i in range(maxlag)]
                # If the p-value is less than 0.05, then we say that region j G-causes region i
                adjacency_matrix[i, j] = 1 if min(p_values) < 0.05 else 0
                print(adjacency_matrix)
                print(count)
                print("###############################")
            count = count + 1

    adjacency_matrices.append(adjacency_matrix)

for idx, adjacency_matrix in enumerate(adjacency_matrices):

    # Generate graph from adjacency matrix
    G = nx.from_numpy_matrix(adjacency_matrix)

    # Generate edges and features for PyTorch Geometric
    edge_index = torch.tensor(list(G.edges), dtype=torch.long)
    x = torch.tensor(time_series_all_subjects[idx], dtype=torch.float)
    y = torch.tensor([labels_all_subjects[idx]], dtype=torch.float)

    # Create graph data for PyTorch Geometric
    data = Data(x=x, edge_index=edge_index.t().contiguous(), y=y)

    # Append data to list
    graph_data_list.append(data)


In [None]:
import pandas as pd
import numpy as np
import nibabel as nib
from nilearn import input_data, datasets
from statsmodels.tsa.stattools import grangercausalitytests
import networkx as nx
from sklearn.model_selection import train_test_split
from torch_geometric.data import Data
import torch
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import os
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import ThreadPoolExecutor


def granger_test(params):
    ts, i, j = params
    maxlag = min(len(ts[:, i]), len(ts[:, j]), ts.shape[0]//3 - 1)
    result = grangercausalitytests(ts[:, [i, j]], maxlag=maxlag, verbose=False)
    p_values = [round(result[i+1][0]['ssr_ftest'][1], 4) for i in range(maxlag)]
    return i, j, 1 if min(p_values) < 0.05 else 0


# Load the CSV file
csv_file = pd.read_csv(r"/Users/vinoth/PycharmProjects/paper_implementation/Dataset/source/mri_images/ABIDE_pcp/Phenotypic_V1_0b_preprocessed1.csv")

# Replace labels 1 and 2 with 0 and 1
csv_file['DX_GROUP'].replace({1: 0, 2: 1}, inplace=True)

# Split data into training and test sets
train_df, test_df = train_test_split(csv_file, test_size=0.2, random_state=42)

# Load the Harvard-Oxford atlas
atlas = datasets.fetch_atlas_harvard_oxford('cort-maxprob-thr25-2mm')
masker = input_data.NiftiLabelsMasker(labels_img=atlas.maps, standardize=True)

# MRI file directory
mri_dir = r"/Users/vinoth/PycharmProjects/paper_implementation/Dataset/source/mri_images/ABIDE_pcp/cpac/nofilt_noglobal/"

# Placeholder for Graph Neural Network Data
graph_data_list = []

# Placeholder for time_series of all subjects
time_series_all_subjects = []
labels_all_subjects = []

for idx, row in enumerate(train_df.itertuples()):
    print(idx)
    mri_filename = os.path.join(mri_dir, row.FILE_ID + "_func_preproc.nii.gz")
    try:
        mri_img = nib.load(mri_filename)

        # Calculate time series
        time_series = masker.fit_transform(mri_img)
        print(time_series)
        print("*******************************")

        # Store time series and corresponding label for all subjects
        time_series_all_subjects.append(time_series)
        labels_all_subjects.append(row.DX_GROUP)
    except FileNotFoundError:
        pass

# Placeholder for adjacency matrices
count = 1
for ts in time_series_all_subjects:
    num_regions = ts.shape[1]
    adjacency_matrix = np.zeros((num_regions, num_regions))

    with ThreadPoolExecutor() as executor:
        params = [(ts, i, j) for i in range(num_regions) for j in range(num_regions) if i != j]
        results = executor.map(granger_test, params)

        for i, j, result in results:
            adjacency_matrix[i, j] = result
            count += 1

    adjacency_matrices.append(adjacency_matrix)

for idx, adjacency_matrix in enumerate(adjacency_matrices):

    # Generate graph from adjacency matrix
    G = nx.from_numpy_matrix(adjacency_matrix)

    # Generate edges and features for PyTorch Geometric
    edge_index = torch.tensor(list(G.edges), dtype=torch.long)
    x = torch.tensor(time_series_all_subjects[idx], dtype=torch.float)
    y = torch.tensor([labels_all_subjects[idx]], dtype=torch.float)

    # Create graph data for PyTorch Geometric
    data = Data(x=x, edge_index=edge_index.t().contiguous(), y=y)

    # Append data to list
    graph_data_list.append(data)


In [None]:
from torch_geometric.nn import GCNConv, GATConv, SAGEConv, GINConv, global_mean_pool
from torch_geometric.data import DataLoader
import pandas as pd

class Net(torch.nn.Module):
    def __init__(self, conv_layer, num_node_features, num_classes):
        super(Net, self).__init__()
        if conv_layer == GINConv:
            nn1 = torch.nn.Sequential(torch.nn.Linear(num_node_features, 16), torch.nn.ReLU(), torch.nn.Linear(16, 16))
            self.conv1 = conv_layer(nn1)
            nn2 = torch.nn.Sequential(torch.nn.Linear(16, 32), torch.nn.ReLU(), torch.nn.Linear(32, 32))
            self.conv2 = conv_layer(nn2)
        else:
            self.conv1 = conv_layer(num_node_features, 16)
            self.conv2 = conv_layer(16, 32)
        self.fc = torch.nn.Linear(32, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        x = global_mean_pool(x, batch)  # Pooling

        x = self.fc(x)
        return F.log_softmax(x, dim=1)

    
# Define models and their names
models = [GCNConv, GATConv, SAGEConv, GINConv]
model_names = ['GCN', 'GAT', 'SAGE', 'GIN']

# Initialize an empty DataFrame to store the results
results = pd.DataFrame(columns=["Model", "Train_Accuracy", "Test_Accuracy"])


from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

results = pd.DataFrame(columns=["Model", "Train_Accuracy", "Test_Accuracy", "Precision", "Recall", "F1-Score"])

# Model Training and Evaluation for each Convolution Layer
for model_name, model_class in zip(model_names, models):
    model = Net(model_class, num_features, num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    # Training
    for epoch in range(100):
        total_loss = 0
        model.train()
        for data in loader:
            data = data.to(device)
            optimizer.zero_grad()
            out = model(data)
            loss = F.nll_loss(out, data.y.long())
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'{model_name} - Epoch: {epoch+1}, Loss: {total_loss/len(loader)}')

    # Evaluation on Training Data
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        _, pred = model(data).max(dim=1)
        correct += int((pred == data.y.long()).sum())
    train_accuracy = correct / len(loader.dataset)
    print(f'{model_name} Train Accuracy: {train_accuracy:.4f}')

    # Evaluation on Test Data
    correct = 0
    all_preds = []
    all_labels = []
    for data in test_loader:
        data = data.to(device)
        with torch.no_grad():
            output = model(data)
            _, pred = output.max(dim=1)
        all_preds.append(pred.cpu().numpy())
        all_labels.append(data.y.cpu().numpy())
        correct += int((pred == data.y.long()).sum())
    test_accuracy = correct / len(test_loader.dataset)
    print(f'{model_name} Test Accuracy: {test_accuracy:.4f}')

    # Flatten the list of predictions and labels
    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    # Confusion Matrix
    cm = confusion_matrix(all_labels, all_preds)
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]  # Normalize

    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, cmap='Blues', fmt=".2%")
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(f'Confusion Matrix (Normalized) for {model_name}')
    plt.show()

    # Classification report
    report = classification_report(all_labels, all_preds, target_names=['Non-Autistic', 'Autistic'], output_dict=True)

    # Append the accuracy to the results DataFrame
    results = results.append({
        "Model": model_name,
        "Train_Accuracy": train_accuracy,
        "Test_Accuracy": test_accuracy,
        "Precision": report['macro avg']['precision'],
        "Recall": report['macro avg']['recall'],
        "F1-Score": report['macro avg']['f1-score'],
    }, ignore_index=True)

print(results)

