In [1]:
# This notebook converts the graph data from the pickle file containing the list of graph representations 
# into anoter pickle file containing the list of graph representations in the format of PyG format.
import torch
import pickle
import numpy as np
from pyg_preprocess import build_K_n, adj_to_edge_attr, pseudo_data

# Load data (MAC)
# pickle_data_path = "/Users/xaviermootoo/Documents/Data/ssl-seizure-detection/pickle/jh101_grs.pickle"
# pickle_data = pickle.load(open(pickle_data_path, "rb"))

# Load data (PC)
pickle_data_path = r"C:\Users\xmoot\Desktop\Data\ssl-seizure-detection\patient_gr\jh101_grs.pickle"
pickle_data = pickle.load(open(pickle_data_path, "rb"))

In [2]:
example = pickle_data[0]
graph, label = example
A, nf, _ = graph
print(type(label))

<class 'float'>


In [4]:
# Test
A = np.random.rand(5,5)
edge_index = build_K_n(A.shape[0])
edge_attr = adj_to_edge_attr(A, edge_index)
print(type(edge_attr))

<class 'numpy.ndarray'>


In [5]:
# Initialize new data list

def create_tensordata(num_nodes, data_list, complete = True):
    """
    Converts the graph data from the pickle file containing the list of graph representations of with entries of the form [[A, NF, EF], Y]
    for numpy arrays A, NF, EF and float Y, to list of graph representations [[edge_index, x, edge_attr], y] for PyG format in torch tensors.
    
    args:
        num_nodes (int): Number of nodes in the graph.
        data_list (list): List of graph representations of the form [[A, NF, EF], Y] for numpy arrays A, NF, EF and float Y.
        complete (bool): Whether the graph is complete or not. Defaults to True.
    
    returns:
        pyg_data (list): List of graph representations of the form [[edge_index, x, edge_attr], y] for PyG format, where edge_index is a torch.long tensor of shape
                        (2, num_edges), x is a torch.float32 tensor of shape (num_nodes, num_node_features), edge_attr is a torch.float32 tensor of shape 
                        (num_edges, num_edge_features). 
    
    """
    pyg_data = []
    
    if complete:
        edge_index = build_K_n(num_nodes)
        edge_index = torch.from_numpy(edge_index).to(torch.long)
        

        for i, example in enumerate(data_list):
            
            # Parse data
            graph, y = example
            A, x, _ = graph
            
            # Add adjacency matrix weights to edge attributes
            edge_attr = adj_to_edge_attr(A, edge_index)
            
            # Convert to tensors
            x = torch.from_numpy(x).to(torch.float32)
            y = torch.tensor(y, dtype=torch.float32)
            edge_attr = torch.from_numpy(edge_attr).to(torch.float32)
            
            pyg_data.append([[edge_index, x, edge_attr], y])

    return pyg_data

In [6]:
pyg_data = create_tensordata(107, pickle_data)

In [7]:
# Pickle pyg_data
pyg_data_path = r"C:\Users\xmoot\Desktop\Data\ssl-seizure-detection\patient_gr\jh101_tensors_grs_pyg.pt"
torch.save(pyg_data, pyg_data_path)

In [10]:
pyg_data = torch.load(pyg_data_path)
print(len(pyg_data))
print(len(pickle_data))
print(type(pyg_data[0][0][0]))
print("Edge features shape:", pyg_data[0][0][2].shape)
print("Edge features stored in edge_attr:", pyg_data[0][0][2])
print("Edge features stored in adj:", pickle_data[0][0][0])

4484
4484
<class 'torch.Tensor'>
Edge features shape: torch.Size([11342, 1])
Edge features stored in edge_attr: tensor([[ 0.6607],
        [-0.1258],
        [-2.1098],
        ...,
        [ 1.7842],
        [-1.2728],
        [ 1.8921]])
Edge features stored in adj: [[ 2.44109208  0.66069462 -0.12578357 ...  0.83818009 -1.58988827
  -0.80577129]
 [ 0.66069462  2.44109208  1.6717804  ...  0.60534856 -2.1600128
  -1.95782143]
 [-0.12578357  1.6717804   2.44109208 ...  0.43843359 -1.49362948
  -1.54959986]
 ...
 [ 0.83818009  0.60534856  0.43843359 ...  2.44109208 -1.4207016
  -1.2727665 ]
 [-1.58988827 -2.1600128  -1.49362948 ... -1.4207016   2.44109208
   1.89210687]
 [-0.80577129 -1.95782143 -1.54959986 ... -1.2727665   1.89210687
   2.44109208]]


In [14]:
# Create graph pairs
pseudo_data = pseudo_data(pyg_data, tau_pos = 6 // 0.12, tau_neg = 50 // 0.12, stats = True, save = False, patientid = "patient", logdir = None)

NameError: name 'pseudo_data' is not defined

In [None]:
# Create PairData


In [None]:
# Link for pairs of graphs: https://pytorch-geometric.readthedocs.io/en/latest/advanced/batching.html
# Link for creating datasets: https://pytorch-geometric.readthedocs.io/en/latest/tutorial/create_dataset.html
# Link for Data handling tutorial: https://www.youtube.com/watch?v=Vz5bT8Xw6Dc&list=PLGMXrbDNfqTzqxB1IGgimuhtfAhGd8lHF&index=5