In [None]:
# from IPython.core.display import display, HTML
# display(HTML("<style>.container { width:90% !important; }</style>"))

In [7]:
import os, sys
import argparse
import os.path as osp
import random
from time import perf_counter
import yaml
import numpy as np
import scipy.sparse as sp
import scipy

import torch
import torch.nn.functional as F
import torch.nn as nn

import logging
import shutil
import ast

from lib_EGNN_Pytorch.models.GNN_basic import AE
from lib_EGNN_Pytorch.models.model_app import RwSL_Model, set_dims_RwSL
from lib_EGNN_Pytorch import utils, evaluation
from lib_EGNN_Pytorch.data_preprocessing import Pre_utils, GBP_precompute

from lib_EGNN_Pytorch.app.RwSL import basic_exec_cluster
from lib_EGNN_Pytorch.app.RwSL import RwSL_app
%matplotlib inline  

In [8]:
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

# Set up logging
logger = logging.getLogger()
logger.handlers = []
ch = logging.StreamHandler()
formatter = logging.Formatter(
        fmt='%(asctime)s (%(levelname)s): %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S')
ch.setFormatter(formatter)
logger.addHandler(ch)
logger.setLevel('INFO')

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [9]:
# >>>>>>>>>>>>>>>>>>>> Setting input data and configurations:
tkipf_graph_path = "/home/xiangli/projects/tmpdata/GCN/Graph_Clustering/tkipf_gcn_data"
sdcn_data_path = "/home/xiangli/projects/tmpdata/GCN/Graph_Clustering/sdcn/"

data_name = 'cora'
# data_name = 'cite'


workdir = f"/home/xiangli/projects/GCN_program/Workshop_local/EGNN_workdir_results/RwSL/{data_name}/cluster/"

pretrain_save_path = os.path.join(sdcn_data_path, f'pretrain_model_parameter/GBP_self_pretrain/pretrain_info_{data_name}_batchnorm/{data_name}.pkl')


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

config_file_name = f'config_{data_name.lower()}.yaml'
config_file_path = os.path.join('./config_data_RwSL/', config_file_name)
with open(config_file_path, 'r') as c:
    config = yaml.safe_load(c)    
    
# For strings that yaml doesn't parse (e.g. None)
for key, val in config.items():
    if type(val) is str:
        try:
            config[key] = ast.literal_eval(val)
        except (ValueError, SyntaxError):
            pass

torch.manual_seed(config['seed'])
random.seed(12345)        
        
print(">>>>>>>>>>>>>>>>>>>> Setting Tuning :")
tune_param_name = "nothing"
# tune_param_name = "n_epochs"

tune_val_label_list = [1]
# tune_val_label_list = [0.0, 0.1, 0.2, 0.5]

# tune_val_list = [10**(-val) for val in tune_val_label_list]
tune_val_list = [val for val in tune_val_label_list]

# trainer_id_list = [0]
trainer_id_list = list(range(1))

config["pretrain_path"] = pretrain_save_path

print(f"Tune param : {tune_param_name} ; with the following values: {tune_val_list}")
print(">>>>>>>>>>>>>>>>>>>> Loading configs :")
for key, val in config.items():
    print(f"{key}    :    {val}")
    
# =================== copy the config file: =======================================
dest_folder = os.path.dirname(os.path.join(workdir, f"tune_{tune_param_name}/"))
if not os.path.exists(os.path.join(dest_folder, config_file_name)):    
    os.makedirs(dest_folder, exist_ok=True)
    shutil.copyfile(config_file_path,  os.path.join(dest_folder, config_file_name))    

>>>>>>>>>>>>>>>>>>>> Setting Tuning :
Tune param : nothing ; with the following values: [1]
>>>>>>>>>>>>>>>>>>>> Loading configs :
name    :    cora
seed    :    20159
lr    :    0.0001
k    :    None
n_clusters    :    7
n_input    :    1433
batch_size_update_p    :    512
arch    :    512-1024-16
golden_pre_epoch    :    10
golden_trainer    :    3
dropout_rate    :    0.0
bn_momentum    :    0.1
weight_decay    :    0.01
batch_size_train    :    256
v    :    1.0
a    :    0.1
b    :    0.01
sigma    :    0.5
n_epochs    :    60
alpha    :    0.1
rmax    :    1e-05
rrz    :    0.4
batchnorm    :    True
update_p    :    1
eval_step    :    5
display_eval_step    :    20
batch_size_pre_train    :    256
pretrain_lr    :    0.0001
pretrain_n_epochs    :    30
pretrain_weight_decay    :    0.01
pretrain_eval_step    :    5
early_stop    :    False
patience    :    40
stop_standard    :    modularity
pretrain_path    :    /home/xiangli/projects/tmpdata/GCN/Graph_Clustering/sdcn/pretrain

### Use tkipf dataset: Cora, PubMed

In [10]:
Cython_GBP_data_path = f"/home/xiangli/projects/tmpdata/GCN/Graph_Clustering/tkipf_gcn_data/Packed_data/no_row_normalize/{data_name.lower()}/GBP_input/"
# Pre_utils.convert_GBP_input_tkipf_gcn(data_name.lower(), tkipf_graph_path, directed = False,  
#                                       normalize = False, redo_save = False)

adj_full, features, labels_full, _ = Pre_utils.load_gcn_tkipf_data(tkipf_graph_path, 
                                                        data_name.lower(), normalize = False, redo_save = False)

features = np.ascontiguousarray(features, dtype = np.float32)
adj_matrix_cython = np.ascontiguousarray(np.load(os.path.join(Cython_GBP_data_path, f'{data_name.lower()}_adj.npy')), dtype=np.int64)



Packed data already exists at: /home/xiangli/projects/tmpdata/GCN/Graph_Clustering/tkipf_gcn_data/Packed_data/no_row_normalize/cora, LOADING...


### Execute Single Run

In [11]:
features_GBP = GBP_precompute.precompute_Cython_GBP_feat(data_name, 40, 
                                config["alpha"], config["rmax"], config["rrz"], 
                                rwnum = 0, directed = False, add_self_loop = False,
                                rand_seed = 10, 
                                feats = features, adj_matrix = adj_matrix_cython)

tune_val_label = tune_val_label_list[0]
tune_val = tune_val_list[0]
trainer_id = trainer_id_list[0]

Total pre-computation time cost is 3.238839063999876 seconds! 


### Generate Pre-train autoencoder

In [12]:
enc_dims, dec_dims, _ = set_dims_RwSL(config)
pretrain_model = AE(enc_dims, dec_dims, config)

# train_time, loss_hist = basic_exec_cluster.pretrain_ae(pretrain_model, config, features_GBP, 
#                                     device = device, pretrain_save_path = pretrain_save_path)

# Post_utils.draw_pretrain_AE_loss(pretrain_save_path)

### Perform train cluster

In [13]:
input_data = [features_GBP, labels_full, adj_full]

model_train = RwSL_Model(config,
               n_clusters=config["n_clusters"],
                v=1.0, 
                pretrain_path = config["pretrain_path"])

checkpoint_file_path = os.path.join(workdir,
                f"tune_{tune_param_name}/model_checkpoint/tunelabel_{tune_val_label}_trainer_{trainer_id}/best_model.pkl")

if os.path.exists(checkpoint_file_path):
    print("ckpt file already exists, so removed ...")
    os.remove(checkpoint_file_path)
else:
    os.makedirs(os.path.dirname(checkpoint_file_path), exist_ok=True)

# val_metric_path = os.path.join(workdir, 
#                             f"tune_{tune_param_name}/val_metric/tunelabel_{tune_val_label}_trainer_{trainer_id}/val_metric.pkl")

# ==========================  Start the training ==========================
time_training, metric_summary = basic_exec_cluster.train(model_train, config, input_data, device = device, checkpoint_file_path = checkpoint_file_path)


ckpt file already exists, so removed ...


2022-04-24 20:20:11 (INFO): Epoch   20 | total train loss: 0.023 | train time: 10.503s
2022-04-24 20:20:21 (INFO): Epoch   40 | total train loss: 0.020 | train time: 20.399s
2022-04-24 20:20:32 (INFO): Epoch   60 | total train loss: 0.020 | train time: 30.761s


### Perform Test cluster

In [14]:
model_test = RwSL_Model(config,
               n_clusters=config["n_clusters"],
                v=1.0, 
                pretrain_path = config["pretrain_path"])

checkpoint_file_path = os.path.join(workdir,
                f"tune_{tune_param_name}/model_checkpoint/tunelabel_{tune_val_label}_trainer_{trainer_id}/best_model.pkl")

if not os.path.exists(os.path.dirname(checkpoint_file_path)):
    raise("checkpoint file is missing")

test_metric_path = os.path.join(workdir,
                f"tune_{tune_param_name}/test_metric/tunelabel_{tune_val_label}_trainer_{trainer_id}/test_metric.pkl")

# >>>>>>>>>>>>>>>>>>>> Start test inference
test_time, test_metric = basic_exec_cluster.test(model_test, config, input_data, 
                            device = "cpu", checkpoint_file_path = checkpoint_file_path)

2022-04-24 20:20:46 (INFO): Test metrics: | Accuracy : 0.2518463810930576 | f1_micro : 0.2518463810930576 | f1_macro : 0.11426784241525585 | NMI : 0.0669474395726579 | ARI : -0.0206928271325985 | conductance : 0.046419098143236075 | modularity : 0.14337201717152318 | test time: 0.276s


### Perform train cluster from a class defined from the interface

In [12]:
obj = RwSL_app.RwSL_framework(config)

In [13]:
input_data = [features_GBP, labels_full, adj_full]

model_train = RwSL_Model(config,
               n_clusters=config["n_clusters"],
                v=1.0, 
                pretrain_path = config["pretrain_path"])

checkpoint_file_path = os.path.join(workdir,
                f"tune_{tune_param_name}/model_checkpoint/tunelabel_{tune_val_label}_trainer_{trainer_id}/best_model.pkl")

if os.path.exists(checkpoint_file_path):
    print("ckpt file already exists, so removed ...")
    os.remove(checkpoint_file_path)
else:
    os.makedirs(os.path.dirname(checkpoint_file_path), exist_ok=True)

val_metric_path = os.path.join(workdir, 
                            f"tune_{tune_param_name}/val_metric/tunelabel_{tune_val_label}_trainer_{trainer_id}/val_metric.pkl")

# ==========================  Start the training ==========================
time_training, metric_summary = obj.train_cluster(model_train, config, input_data, device = device, checkpoint_file_path = checkpoint_file_path)


2022-04-21 21:10:20 (INFO): Epoch   20 | total train loss: 0.021 | train time: 18.603s
2022-04-21 21:10:39 (INFO): Epoch   40 | total train loss: 0.018 | train time: 36.816s
2022-04-21 21:10:58 (INFO): Epoch   60 | total train loss: 0.019 | train time: 55.316s


In [14]:
model_test = RwSL_Model(config,
               n_clusters=config["n_clusters"],
                v=1.0, 
                pretrain_path = config["pretrain_path"])

checkpoint_file_path = os.path.join(workdir,
                f"tune_{tune_param_name}/model_checkpoint/tunelabel_{tune_val_label}_trainer_{trainer_id}/best_model.pkl")

if not os.path.exists(os.path.dirname(checkpoint_file_path)):
    raise("checkpoint file is missing")

# test_metric_path = os.path.join(workdir,
#                 f"tune_{tune_param_name}/test_metric/tunelabel_{tune_val_label}_trainer_{trainer_id}/test_metric.pkl")

# >>>>>>>>>>>>>>>>>>>> Start test inference
test_time, test_metric = obj.test_cluster(model_test, config, input_data, 
                            device = "cpu", checkpoint_file_path = checkpoint_file_path)

2022-04-21 21:10:59 (INFO): Test metrics: | Accuracy : 0.275480059084195 | f1_micro : 0.275480059084195 | f1_macro : 0.12129655408589715 | NMI : 0.05696476380259752 | ARI : -0.012091254821594465 | conductance : 0.05229253505115574 | modularity : 0.16199088181030144 | test time: 0.171s


### GPU flush

In [None]:
# # free GPU memory
# !(nvidia-smi | grep 'python' | awk '{ print $5 }' | xargs -n1 kill -9 )
# !(nvidia-smi | grep 'python')