In [2]:
from torch import tensor
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import SAGEConv
from torch_geometric_temporal.nn.recurrent import DCRNN
from torch_geometric.nn import Linear
from torch_geometric.nn import global_mean_pool
import numpy as np
import torch
import os
from torch_geometric.loader import DataLoader
import random

In [3]:
mi_dir_old = r'C:\Users\yl646\Documents\ADHD_research\DATA\OUTPUT\step_6_test\MI_TABLE\mi_adhd.npy'
mi_dir_adhd   = r'C:\Users\yl646\Documents\ADHD_research\DATA\OUTPUT\step_6_test_complete\MI_TABLE\mi_adhd.npy'
mi_dir_control   = r'C:\Users\yl646\Documents\ADHD_research\DATA\OUTPUT\step_6_test_complete\MI_TABLE\mi_control.npy'
result_dir = r'C:\Users\yl646\Documents\ADHD_Research\DATA\OUTPUT\step_6_test_complete\RESULTS'

In [4]:
ADHD_mi = np.load(mi_dir_adhd)
CONTROL_mi = np.load(mi_dir_control)
dataset_feat = []

# ADHD MI table into pyG data format with features
(epochs, channels, temp) = ADHD_mi.shape
for epoch in range(epochs):
    edges_np = np.array([[0],[0]])
    for row in range(channels):
        for col in range(channels):
            edge = np.array([[row],[col]])
            edges_np = np.concatenate((edges_np,edge),axis=1)
            #weight = np.array([[ADHD_mi[epoch,row,col]]])
            #weights_np = np.concatenate((weights_np, weight),axis=0)

    edges_np = edges_np[:,1:]
    edges = tensor(edges_np, dtype=torch.long)
    y = torch.tensor([1], dtype=torch.int64)
    x = torch.tensor(ADHD_mi[epoch,:,:], dtype=torch.float)
    #x = x.flatten()[1:].view(channels-1, channels+1)[:,:-1].reshape(channels, channels-1)
    
    graph = Data(x=x, edge_index=edges, y=y)
    dataset_feat.append(graph)
    
# CONTROL MI table into pyG data format with features
(epochs, channels, temp) = CONTROL_mi.shape
for epoch in range(epochs):
    edges_np = np.array([[0],[0]])
    for row in range(channels):
        for col in range(channels):
            edge = np.array([[row],[col]])
            edges_np = np.concatenate((edges_np,edge),axis=1)
            #weight = np.array([[CONTROL_mi[epoch,row,col]]])
            #weights_np = np.concatenate((weights_np, weight),axis=0)
        
    edges_np = edges_np[:,1:]
    edges = tensor(edges_np, dtype=torch.long)
    y = torch.tensor([0], dtype=torch.int64)
    x = torch.tensor(CONTROL_mi[epoch,:,:], dtype=torch.float)
    #x = x.flatten()[1:].view(channels-1, channels+1)[:,:-1].reshape(channels, channels-1)
    
    graph = Data(x=x, edge_index=edges, y=y)
    dataset_feat.append(graph)
    
print("MI table shape: ",ADHD_mi.shape, "(epochs, channels, temp)")
print("MI table shape: ",CONTROL_mi.shape, "(epochs, channels, temp)")
print("# of graphs: ",len(dataset_feat))

MI table shape:  (2231, 20, 20) (epochs, channels, temp)
MI table shape:  (1757, 20, 20) (epochs, channels, temp)
# of graphs:  3988


In [5]:
data = dataset_feat[300]
print(data)
#print(f'Number of classes: {data.num_classes}')
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')
print(f'Number of features: {data.num_node_features}')
print(data.y)

Data(x=[20, 20], edge_index=[2, 400], y=[1])
Number of nodes: 20
Number of edges: 400
Average node degree: 20.00
Has isolated nodes: False
Has self-loops: True
Is undirected: True
Number of features: 20
tensor([1])


In [6]:
# shuffle data and split it into training and test set
random.seed(1)
random.shuffle(dataset_feat)
train_dataset = dataset_feat[:3500]
test_dataset = dataset_feat[3500:]

# DataLoader returns a list of epochs
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

print("training batches")
for batch in train_loader:
    print(batch.num_graphs, end =' ')
print("\ntest batches")
for batch in test_loader:
    print(batch.num_graphs, end =' ')

training batches
128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 44 
test batches
128 128 128 104 

In [24]:
class SAGE(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(SAGE, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = SAGEConv(data.num_node_features, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        self.conv3 = SAGEConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, 2)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x
    
model = SAGE(hidden_channels=20)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
        out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
        loss = criterion(out, data.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.

def test(loader, dataset):
    model.eval()
    
    correct = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
        out = model(data.x, data.edge_index, data.batch) 
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        correct += int((pred == data.y).sum())  # Check against ground-truth labels.
    return correct / len(dataset)  # Derive ratio of correct predictions.

In [25]:
for epoch in range(1, 150):
    train()
    train_acc = test(train_loader,train_dataset)
    test_acc = test(test_loader,test_dataset)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Train Acc: 0.6294, Test Acc: 0.6209
Epoch: 002, Train Acc: 0.6914, Test Acc: 0.7029
Epoch: 003, Train Acc: 0.7223, Test Acc: 0.7377
Epoch: 004, Train Acc: 0.7677, Test Acc: 0.7725
Epoch: 005, Train Acc: 0.8000, Test Acc: 0.7828
Epoch: 006, Train Acc: 0.8089, Test Acc: 0.8012
Epoch: 007, Train Acc: 0.8209, Test Acc: 0.8217
Epoch: 008, Train Acc: 0.7891, Test Acc: 0.7643
Epoch: 009, Train Acc: 0.8346, Test Acc: 0.8299
Epoch: 010, Train Acc: 0.8554, Test Acc: 0.8402
Epoch: 011, Train Acc: 0.8323, Test Acc: 0.8176
Epoch: 012, Train Acc: 0.8757, Test Acc: 0.8668
Epoch: 013, Train Acc: 0.8894, Test Acc: 0.8648
Epoch: 014, Train Acc: 0.8860, Test Acc: 0.8525
Epoch: 015, Train Acc: 0.8454, Test Acc: 0.8381
Epoch: 016, Train Acc: 0.9009, Test Acc: 0.8689
Epoch: 017, Train Acc: 0.8900, Test Acc: 0.8545
Epoch: 018, Train Acc: 0.9031, Test Acc: 0.8750
Epoch: 019, Train Acc: 0.8820, Test Acc: 0.8668
Epoch: 020, Train Acc: 0.9094, Test Acc: 0.8811
Epoch: 021, Train Acc: 0.8986, Test Acc:

In [22]:
class DIFF(torch.nn.Module):
    def __init__(self, hidden_channels,K):
        super(DIFF, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = DCRNN(data.num_node_features, hidden_channels, K)
        self.conv2 = DCRNN(hidden_channels, hidden_channels, K)
        self.lin = Linear(hidden_channels, 2)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x
    
model_dcrnn = DIFF(hidden_channels=2, K=2)
optimizer = torch.optim.Adam(model_dcrnn.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train_dcrnn():
    model_dcrnn.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
        out = model_dcrnn(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
        loss = criterion(out, data.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.

def test_dcrnn(loader, dataset):
    model_dcrnn.eval()
    
    correct = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
        out = model_dcrnn(data.x, data.edge_index, data.batch) 
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        correct += int((pred == data.y).sum())  # Check against ground-truth labels.
    return correct / len(dataset)  # Derive ratio of correct predictions.

In [23]:
for epoch in range(1, 2):
    train_dcrnn()
    train_acc = test_dcrnn(train_loader,train_dataset)
    test_acc = test_dcrnn(test_loader,test_dataset)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Train Acc: 0.5609, Test Acc: 0.5492
