In [1]:
import main
import model

### Reading Data

In [2]:
n_path = '../data/cora/graph.txt'
f_path = '../data/cora/feature.txt'
l_path = '../data/cora/group.txt'
graph, adj_mat, features = main.load_data(net_path=n_path, feature_path=f_path)

Loading data graph from ../data/cora/graph.txt
Loading data graph from ../data/cora/feature.txt
	Adjacency matrix shape: (2708, 2708)
	Feature matrix shape: (2708, 1433)
	Number of edges: 5429


### Configuring options

In [3]:
options = main.Options(number_of_nodes=adj_mat.shape[0], number_of_features=features.shape[1], rate=0.2)

### Training models

#### Node Model

In [4]:
node_model = model.NodeModel(options=options, adj=adj_mat, features=features)
node_model.train(epochs=20)
embeddings_nm = node_model.predict()
unf_embedding_nm = embeddings_nm[model.UNF_KEY]

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


#### Edge Model

In [5]:
edge_model = model.EdgeModel(options=options, adj=adj_mat, features=features, transferred_weights=node_model.get_learned_weights())
edge_model.train(epochs=1)
embeddings_em = edge_model.predict()
unf_embedding_em = embeddings_em[model.UNF_KEY]

Transferring learned weights to the 0-th layer of encoder
Weight shape: (2708, 512), bias shape: (512,)
Transferring learned weights to the 1-th layer of encoder
Weight shape: (512, 256), bias shape: (256,)
Transferring learned weights to the embedding layer
Transferring learned weights to the 0-th layer of decoder
Weight shape: (128, 256), bias shape: (256,)
Transferring learned weights to the 1-th layer of decoder
Weight shape: (256, 512), bias shape: (512,)
Transferring learned weights to reconstruction layer
Epoch 1/1


## Node Classification experiment

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn import model_selection, preprocessing
from sklearn.multiclass import OneVsRestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.metrics import f1_score

import pandas as pd
import numpy as np

In [7]:
def node_classification(features, lbl_path, tr=.1, seed=0, cv=10):
    ss = model_selection.ShuffleSplit(n_splits=cv, train_size=tr, test_size=1. - tr, random_state=seed)
    label_df = pd.read_csv(lbl_path, header=None, sep='\t', names=['node', 'label'])
    labels = label_df.sort_values('node')['label'].values
    micros = []
    macros = []
    print('Training ratio: {}'.format(tr))
    clf = OneVsRestClassifier(LogisticRegression())
    for train_index, test_index in ss.split(features):
        x_train, x_test = features[train_index], features[test_index]
        y_train, y_test = labels[train_index], labels[test_index]
        pipe = make_pipeline(preprocessing.StandardScaler(), clf)
        y_hat = pipe.fit(x_train, y_train).predict(x_test)
        mic = f1_score(y_test, y_hat, average='micro')
        mac = f1_score(y_test, y_hat, average='macro')
        micros.append(mic)
        macros.append(mac)

    mic_mean, mic_std = np.mean(micros), np.std(micros)
    mac_mean, mac_std = np.mean(macros), np.std(macros)
    return mic_mean, mic_std, mac_mean, mac_std

#### Evaluation of the node model after 20 epochs

In [8]:
mic_f1, mic_std, mac_f1, mac_std  = node_classification(unf_embedding_nm, lbl_path=l_path)
mic_f1, mic_std, mac_f1, mac_std

Training ratio: 0.1




(0.6358080393765382,
 0.011315775081528222,
 0.6103103123369444,
 0.017725576568914443)

#### Evaluation of the edge model after 1 epoch

In [9]:
mic_f1, mic_std, mac_f1, mac_std  = node_classification(unf_embedding_em, lbl_path=l_path)
mic_f1, mic_std, mac_f1, mac_std

Training ratio: 0.1


(0.6226004922067269,
 0.01121571809423823,
 0.5968780328382988,
 0.01393250999458774)