In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import sys
if 'google.colab' in sys.modules:
  %pip install -q stellargraph[demos]==1.1.0

[K     |████████████████████████████████| 414 kB 8.1 MB/s 
[K     |████████████████████████████████| 500 kB 55.6 MB/s 
[K     |████████████████████████████████| 120 kB 59.9 MB/s 
[K     |████████████████████████████████| 1.6 MB 58.3 MB/s 
[K     |████████████████████████████████| 82 kB 608 kB/s 
[K     |████████████████████████████████| 41 kB 681 kB/s 
[?25h  Building wheel for mplleaflet (setup.py) ... [?25l[?25hdone


In [None]:
import stellargraph as sg

try:
    sg.utils.validate_notebook_version("1.1.0")
except AttributeError:
    raise ValueError(
        f"This notebook requires StellarGraph version 1.1.0, but a different version {sg.__version__} is installed.  Please see <https://github.com/stellargraph/stellargraph/issues/1172>."
    ) from None

In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import random
import stellargraph as sg

In [None]:
edges = pd.read_csv("/content/drive/My Drive/BaselineToShow/df_D1_1k_edgelist.edgelist")
features_df = pd.read_csv("/content/drive/My Drive/BaselineToShow/df_D1_1k.csv")

In [None]:
graph_labels = features_df['isp']

In [None]:
features = features_df[['node', 'indegree', 'outdegree', 'degree', 'instrength', 'outstrength', 'strength', 'numberneightbours', 'invtransfreq']]
features = features.set_index("node")
features

Unnamed: 0_level_0,indegree,outdegree,degree,instrength,outstrength,strength,numberneightbours,invtransfreq
node,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,1,0,1,0.0,0.0,0.0,1,
1,0,1587,1587,0.0,0.0,0.0,867,
2,5,0,5,0.0,0.0,0.0,1,
3,1,0,1,0.0,0.0,0.0,1,
4,1,0,1,0.0,0.0,0.0,1,
...,...,...,...,...,...,...,...,...
995,4,0,4,0.0,0.0,0.0,1,
996,2,0,2,0.0,0.0,0.0,1,
997,8,0,8,0.0,0.0,0.0,2,
998,5,0,5,0.0,0.0,0.0,2,


In [None]:
edges = edges[['target', 'source']]
edges

Unnamed: 0,target,source
0,2,1
1,3,1
2,4,1
3,5,1
4,6,1
...,...,...
1025,957,879
1026,958,879
1027,913,896
1028,897,913


In [None]:
graph = sg.StellarGraph(features, edges)
print(graph.info())

StellarGraph: Undirected multigraph
 Nodes: 1000, Edges: 1030

 Node types:
  default: [1000]
    Features: float32 vector, length 8
    Edge types: default-default->default

 Edge types:
    default-default->default: [1030]
        Weights: all 1 (default)
        Features: none


In [None]:
from stellargraph.mapper import (
    CorruptedGenerator,
    FullBatchNodeGenerator,
    GraphSAGENodeGenerator,
    HinSAGENodeGenerator,
)
from stellargraph import StellarGraph
from stellargraph.layer import GCN, DeepGraphInfomax, GraphSAGE, GAT, APPNP, HinSAGE

from stellargraph import datasets
from stellargraph.utils import plot_history

import pandas as pd
from matplotlib import pyplot as plt
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.manifold import TSNE
from IPython.display import display, HTML

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
from tensorflow.keras import Model

In [None]:
fullbatch_generator = FullBatchNodeGenerator(graph, sparse=False)
gcn_model = GCN(layer_sizes=[8,8,8], activations=["relu", "relu", "relu"], generator=fullbatch_generator)

corrupted_generator = CorruptedGenerator(fullbatch_generator)
gen = corrupted_generator.flow(graph.nodes())

Using GCN (local pooling) filters...


In [None]:
x_emb_in, x_emb_out = gcn_model.in_out_tensors()
x_out = tf.squeeze(x_emb_out, axis=0)
emb_model = Model(inputs=x_emb_in, outputs=x_out)

In [None]:
train_subjects, test_subjects = model_selection.train_test_split(
    graph_labels, test_size=None, stratify=graph_labels
)

test_gen = fullbatch_generator.flow(test_subjects.index)
train_gen = fullbatch_generator.flow(train_subjects.index)

test_embeddings = emb_model.predict(test_gen) 
train_embeddings = emb_model.predict(train_gen)

In [None]:
import lightgbm as lgb
from statistics import mean, stdev
from sklearn.model_selection import StratifiedKFold

clf = lgb.LGBMClassifier(learning_rate=0.005)
clf.fit(train_embeddings, train_subjects)

LGBMClassifier(learning_rate=0.005)

In [None]:
y_pred=clf.predict(test_embeddings)

In [None]:
from sklearn.metrics import roc_auc_score, f1_score,classification_report, confusion_matrix, accuracy_score
accuracy=accuracy_score(y_pred, test_subjects)
auc = roc_auc_score(test_subjects, clf.predict_proba(test_embeddings)[:, 1])
print("accuracy:",accuracy)
print("AUC:",auc)
cm = confusion_matrix(test_subjects, y_pred)
tp = cm[0,0]
tn = cm[1,1]
fp = cm[0,1]
fn = cm[1,0]
precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1 = 2*(recall * precision) / (recall + precision)
print("Recall:",recall)
print("Precision:",precision)
print("F1:",f1)
print(classification_report(test_subjects, y_pred))

accuracy: 0.98
AUC: 0.5
Recall: 0.98
Precision: 1.0
F1: 0.98989898989899
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       245
           1       0.00      0.00      0.00         5

    accuracy                           0.98       250
   macro avg       0.49      0.50      0.49       250
weighted avg       0.96      0.98      0.97       250



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
