In [1]:
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina' # 'retina'
import logging
import torch
import numpy as np
import pickle
from pathlib import Path
import pandas as pd
import copy
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import time

from main.deepis import DeepIS, DiffusionPropagate, Identity
from main.models.MLP import MLPTransform
from main.models.GraphSAGE import SupervisedGraphSage
from main.models.GAT import GAT
from main.models.SGC import SGC
from main.utils import to_nparray, to_torch, sp2adj_lists
from main.training import train_model, get_predictions_new_seeds, PIteration, FeatureCons
from main.earlystopping import stopping_args
from main.utils import load_dataset, load_latest_ckpt
# from im.influspread import IS

logging.basicConfig(
    format='%(asctime)s:%(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    level=logging.INFO)
plt.style.use('seaborn')
me_op = lambda x, y: np.mean(np.abs(x - y))
te_op = lambda x, y: np.abs(np.sum(x) - np.sum(y))                               

# Load the dataset


## Load from saved SparseGraph object, with added prob_matrix and influ_mats

In [2]:
# key parameters
dataset = 'cora_ml' # 'cora_ml', 'citeseer', 'ms_academic', 'pubmed'
model_name = 'deepis' # 'deepis', ''

In [3]:
graph = load_dataset(dataset)
print(graph)

influ_mat_list = copy.copy(graph.influ_mat_list)
graph.influ_mat_list = graph.influ_mat_list[:50]
graph.influ_mat_list.shape, influ_mat_list.shape

<Undirected, unweighted and connected SparseGraph with 15962 edges (no self-loops). Data: adj_matrix (2810x2810), attr_matrix (2810x2879), labels (2810), node_names (2810), attr_names (2879), class_names (7), prob_matrix (2810x2810), influ_mat_list (60x2810x25)>


((50, 2810, 25), (60, 2810, 25))

# Build model

In [4]:
%load_ext autoreload
%autoreload 2

# training parameters
niter = 4 
propagate_model = lambda x, _, y:x[y]
fea_constructor = FeatureCons(model_name, niter=niter)
fea_constructor.prob_matrix = graph.prob_matrix
device = 'cpu' # 'cpu', 'cuda'
args_dict = {
    'learning_rate': 0.0001,
    'λ': 0,
    'γ': 0,
    'ckpt_dir': Path('./checkpoints'),
    'idx_split_args': {'ntraining': 1500, 'nstopping': 500, 'nval': 10, 'seed': 2413340114},  
    'test': False,
    'device': device,
    'print_interval': 1,
    'batch_size': None,
    
}
if model_name == 'deepis':
    gnn_model = MLPTransform(input_dim=niter+1, hiddenunits=[64, 64], num_classes=1)
else:
    pass
model = DeepIS( gnn_model=gnn_model, propagate=propagate_model)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Train model from stratch

In [5]:
model, result = train_model(model_name + '_' + dataset, model, fea_constructor, graph, **args_dict)

48, (1.098 sec)
2020-12-13 17:12:53:Epoch 47: Train loss = 0.1167, Train error = 0.1167, early stopping loss = 0.0860, early stopping error = 0.0860, (1.115 sec)
2020-12-13 17:12:54:Epoch 48: Train loss = 0.1171, Train error = 0.1171, early stopping loss = 0.0801, early stopping error = 0.0801, (1.098 sec)
2020-12-13 17:12:55:Epoch 49: Train loss = 0.1128, Train error = 0.1128, early stopping loss = 0.0876, early stopping error = 0.0876, (1.096 sec)
2020-12-13 17:12:56:Epoch 50: Train loss = 0.1139, Train error = 0.1139, early stopping loss = 0.0824, early stopping error = 0.0824, (1.091 sec)
2020-12-13 17:12:57:Epoch 51: Train loss = 0.1142, Train error = 0.1142, early stopping loss = 0.0854, early stopping error = 0.0854, (1.144 sec)
2020-12-13 17:12:59:Epoch 52: Train loss = 0.1149, Train error = 0.1149, early stopping loss = 0.0801, early stopping error = 0.0801, (1.201 sec)
2020-12-13 17:13:00:Epoch 53: Train loss = 0.1131, Train error = 0.1131, early stopping loss = 0.0809, early

# Prediction on NEW SEEDS

In [6]:
dataset = 'cora_ml'
graph = load_dataset(dataset)
influ_mat_list = copy.copy(graph.influ_mat_list)
graph

<Undirected, unweighted and connected SparseGraph with 15962 edges (no self-loops). Data: adj_matrix (2810x2810), attr_matrix (2810x2879), labels (2810), node_names (2810), attr_names (2879), class_names (7), prob_matrix (2810x2810), influ_mat_list (60x2810x25)>

### predict

In [7]:
# metrics
me = lambda x, y: np.mean(np.abs(x - y))
te = lambda x, y: np.abs(np.sum(x) - np.sum(y)) 

In [8]:
%%time
influ_mat = influ_mat_list[58]
seed_vec = influ_mat[:, 0]
seed_idx = np.argwhere(seed_vec == 1) # used by PIteration
influ_vec = influ_mat[:, -1]

fea_constructor.prob_matrix = graph.prob_matrix
preds = get_predictions_new_seeds(model, fea_constructor, seed_vec, np.arange(len(seed_vec)))
final_preds = PIteration(graph.prob_matrix, preds, seed_idx, True, 2)

print('mean error:', me(influ_vec, final_preds))
print('total error:', te(influ_vec, final_preds))


mean error: 0.01882258335314028
total error: 15.74771246034561
CPU times: user 2.04 s, sys: 110 ms, total: 2.15 s
Wall time: 439 ms
