In [5]:
%load_ext autoreload
%autoreload 2
import logging
import torch
import numpy as np
import pickle
from pathlib import Path
import pandas as pd
import copy
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import time

from main.deepis import DeepIS, DiffusionPropagate, Identity
from main.models.MLP import MLPTransform
from main.utils import to_nparray, to_torch, sp2adj_lists
from main.training import train_model, get_predictions_new_seeds, PIteration, FeatureCons
from main.earlystopping import stopping_args
from main.utils import load_dataset, load_latest_ckpt
# from im.influspread import IS

logging.basicConfig(
    format='%(asctime)s:%(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    level=logging.INFO)
plt.style.use('seaborn')
me_op = lambda x, y: np.mean(np.abs(x - y))
te_op = lambda x, y: np.abs(np.sum(x) - np.sum(y))                               

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load the dataset


## Load from saved SparseGraph object, with added prob_matrix and influ_mats

In [6]:
# key parameters
dataset = 'cora_ml' # 'cora_ml', 'citeseer', 'ms_academic', 'pubmed'
model_name = 'deepis' # 'deepis', ''

In [7]:
graph = load_dataset(dataset)
print(graph)

influ_mat_list = copy.copy(graph.influ_mat_list)
graph.influ_mat_list = graph.influ_mat_list[:50]
graph.influ_mat_list.shape, influ_mat_list.shape

<Undirected, unweighted and connected SparseGraph with 15962 edges (no self-loops). Data: adj_matrix (2810x2810), attr_matrix (2810x2879), labels (2810), node_names (2810), attr_names (2879), class_names (7), prob_matrix (2810x2810), influ_mat_list (60x2810x25)>


((50, 2810, 25), (60, 2810, 25))

# Build model

In [8]:
%load_ext autoreload
%autoreload 2

# training parameters
ndim = 5
propagate_model = DiffusionPropagate(graph.prob_matrix, niter=2)
fea_constructor = FeatureCons(model_name, ndim=ndim)
fea_constructor.prob_matrix = graph.prob_matrix
device = 'cuda' # 'cpu', 'cuda'
args_dict = {
    'learning_rate': 0.0001,
    'λ': 0,
    'γ': 0,
    'ckpt_dir': Path('./checkpoints'),
    'idx_split_args': {'ntraining': 1500, 'nstopping': 500, 'nval': 10, 'seed': 2413340114},  
    'test': False,
    'device': device,
    'print_interval': 1,
    'batch_size': None,
    
}
if model_name == 'deepis':
    gnn_model = MLPTransform(input_dim=ndim, hiddenunits=[64, 64], num_classes=1)
else:
    pass
model = DeepIS(gnn_model=gnn_model, propagate=propagate_model)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Train model from stratch

In [9]:
model, result = train_model(model_name + '_' + dataset, model, fea_constructor, graph, **args_dict)

2020-12-14 16:28:54:PyTorch seed: 3286736829
2020-12-14 16:29:11:Epoch 0: Train loss = 0.0507, Train error = 0.0507, early stopping loss = 0.0501, early stopping error = 0.0501, (14.459 sec)
2020-12-14 16:29:14:Epoch 1: Train loss = 0.0472, Train error = 0.0472, early stopping loss = 0.0496, early stopping error = 0.0496, (3.160 sec)
2020-12-14 16:29:18:Epoch 2: Train loss = 0.0457, Train error = 0.0457, early stopping loss = 0.0436, early stopping error = 0.0436, (3.584 sec)
2020-12-14 16:29:21:Epoch 3: Train loss = 0.0421, Train error = 0.0421, early stopping loss = 0.0408, early stopping error = 0.0408, (3.252 sec)
2020-12-14 16:29:24:Epoch 4: Train loss = 0.0388, Train error = 0.0388, early stopping loss = 0.0383, early stopping error = 0.0383, (3.007 sec)
2020-12-14 16:29:27:Epoch 5: Train loss = 0.0357, Train error = 0.0357, early stopping loss = 0.0357, early stopping error = 0.0357, (3.200 sec)
2020-12-14 16:29:32:Epoch 6: Train loss = 0.0332, Train error = 0.0332, early stoppi

# Prediction on NEW SEEDS

In [10]:
dataset = 'cora_ml'
graph = load_dataset(dataset)
influ_mat_list = copy.copy(graph.influ_mat_list)
graph

<Undirected, unweighted and connected SparseGraph with 15962 edges (no self-loops). Data: adj_matrix (2810x2810), attr_matrix (2810x2879), labels (2810), node_names (2810), attr_names (2879), class_names (7), prob_matrix (2810x2810), influ_mat_list (60x2810x25)>

### predict

In [11]:
# metrics
me = lambda x, y: np.mean(np.abs(x - y))
te = lambda x, y: np.abs(np.sum(x) - np.sum(y)) 

In [18]:
%%time

influ_mat = influ_mat_list[55]
seed_vec = influ_mat[:, 0]
seed_idx = np.argwhere(seed_vec == 1) # used by PIteration
influ_vec = influ_mat[:, -1]

fea_constructor.prob_matrix = graph.prob_matrix
final_preds = get_predictions_new_seeds(model, fea_constructor, seed_vec, np.arange(len(seed_vec)), graph.prob_matrix, seed_idx)

print('mean error:', me(influ_vec, final_preds))
print('total error:', te(influ_vec, final_preds))


mean error: 0.02211454525282374
total error: 29.403850097656232
CPU times: user 818 ms, sys: 80.8 ms, total: 899 ms
Wall time: 282 ms
