### Imports

In [15]:
import argparse, time

import dgl
import networkx as nx
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgi.dgi import Classifier, DGI
from dgl import DGLGraph
from dgl.data import load_data, register_data_args, DGLDataset

import os

time: 1.3 ms (started: 2023-10-23 14:16:28 -05:00)


In [16]:
from tqdm import tqdm
from dgi.utils_dgi import *
from utils.utils_go import *

import json
import pandas as pd

os.environ["DGLBACKEND"] = "pytorch"

%load_ext autotime

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
time: 9.88 ms (started: 2023-10-23 14:16:28 -05:00)


In [17]:
# torch.manual_seed(42)
# np.random.seed(42)

time: 394 µs (started: 2023-10-23 14:16:28 -05:00)


### Parameters

In [18]:
file = open("exp.json")
experiment = json.load(file)
exp_num = experiment["exp"]

file = open("output/{}/parameters.json".format(exp_num))
params = json.load(file)

exp = params["exp"]
print("Exp:\t\t", exp)

method = "dgi"
print("Method:\t\t", method)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

dimension = params["dimension"]
print("Dimension:\t", dimension)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

Exp:		 exp3
Method:		 dgi
Data variations: ['none']
Dimension:	 3
Groups id:	 ['pck1', 'zwf1', 'WT']
Subgroups id:	 {'pck1': ['1', '2', '3'], 'zwf1': ['1', '2'], 'WT': ['1', '2', '3', '4', '5']}
time: 4.68 ms (started: 2023-10-23 14:16:28 -05:00)


### Node embeddings

In [19]:
nodes_data = pd.read_csv("output/{}/preprocessing/graphs_data/nodes_data_{}_{}.csv".format(exp, groups_id[0], subgroups_id[groups_id[0]][0])).iloc[:, 2:]
edges_data = pd.read_csv("output/{}/preprocessing/graphs_data/edges_data_{}_{}.csv".format(exp, groups_id[0], subgroups_id[groups_id[0]][0]))

dataset = CustomDatasetDGI("g1", nodes_data, edges_data)
graph = dataset[0]

print(graph)

Graph(num_nodes=120, num_edges=6937,
      ndata_schemes={'feat': Scheme(shape=(24,), dtype=torch.float32), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'test_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={'weight': Scheme(shape=(), dtype=torch.float64)})
time: 16.3 ms (started: 2023-10-23 14:16:28 -05:00)


In [20]:
# get node embeddings
seed = 42
for data_variation in data_variations:
    if data_variation != "none":
        for group in groups_id:
            subgroups_id[group] = [data_variation]
        torch.manual_seed(seed)
        np.random.seed(seed)
    else:
        torch.manual_seed(seed)
        np.random.seed(seed)

    print("Subgroups id:\t", subgroups_id)
    
    for group in tqdm(groups_id):
        for subgroup in tqdm(subgroups_id[group]):
            nodes_data = pd.read_csv("output/{}/preprocessing/graphs_data/nodes_data_{}_{}.csv".format(exp, group, subgroup)).iloc[:, 2:]
            edges_data = pd.read_csv("output/{}/preprocessing/graphs_data/edges_data_{}_{}.csv".format(exp, group, subgroup))

            # read dataset
            # data = load_data(args)
            data = CustomDatasetDGI("g_{}_{}".format(group, subgroup), nodes_data, edges_data)
            graph = data[0]

            # train
            args_ = args_dgi(dimension)
            train_dgi(exp, graph, args_, method, group, subgroup, 0)

Subgroups id:	 {'pck1': ['1', '2', '3'], 'zwf1': ['1', '2'], 'WT': ['1', '2', '3', '4', '5']}


  0%|          | 0/3 [00:00<?, ?it/s]

Early stopping!


100%|██████████| 3/3 [00:15<00:00,  5.16s/it]
 33%|███▎      | 1/3 [00:15<00:30, 15.49s/it]

Early stopping!




Early stopping!


100%|██████████| 2/2 [00:04<00:00,  2.06s/it]
 67%|██████▋   | 2/3 [00:19<00:08,  8.80s/it]

Early stopping!




Early stopping!




Early stopping!




Early stopping!




Early stopping!


100%|██████████| 5/5 [00:04<00:00,  1.03it/s]
100%|██████████| 3/3 [00:24<00:00,  8.15s/it]

Early stopping!
time: 24.5 s (started: 2023-10-23 14:16:28 -05:00)





In [21]:
df_node_embeddings = pd.read_csv("output/{}/node_embeddings/node-embeddings_{}_{}_{}_{}.csv".format(exp, method, groups_id[0], 
                                                                                                    subgroups_id[groups_id[0]][0], 0), index_col=0)
df_node_embeddings

Unnamed: 0,0,1,2
0,-0.003466,0.251491,-0.071463
1,0.000139,0.250145,-0.066976
2,0.003242,0.248974,-0.063338
3,0.006002,0.247926,-0.060187
4,0.008537,0.246964,-0.057311
...,...,...,...
115,-0.305244,0.515041,0.081374
116,-0.341669,0.542934,0.081637
117,-0.397485,0.585252,0.080156
118,-0.463171,0.634479,0.074543


time: 12.6 ms (started: 2023-10-23 14:16:53 -05:00)
