In [1]:
#!pip install stellargraph              #For Google Collab or Kaggle
import numpy as np
import pandas as pd

import stellargraph as sg
from stellargraph.mapper import PaddedGraphGenerator
from stellargraph.layer import DeepGraphCNN
from stellargraph import StellarGraph

from sklearn import model_selection
from IPython.display import display, HTML

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Conv1D, MaxPool1D, Dropout, Flatten
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.callbacks import ModelCheckpoint

import pickle

In [2]:
with open("graphs.txt", "rb") as fp:   # Unpickling ../input/egg-new/26-0.2/graphs.txt
    graphs = pickle.load(fp)
    
with open("labels.txt", "rb") as fp:   # Unpickling
    labels = pickle.load(fp)

In [3]:
print(labels)
summary = pd.DataFrame(
    [(g.number_of_nodes(), g.number_of_edges()) for g in graphs],
    columns=["nodes", "edges"],
)
print('\n',summary.describe().round(1))
print('\n',labels.value_counts().to_frame())

0      0
1      0
2      0
3      0
4      0
      ..
901    1
902    1
903    1
904    1
905    1
Name: label, Length: 906, dtype: category
Categories (2, object): ['0', '1']

        nodes  edges
count  906.0  906.0
mean    25.0   85.9
std      0.0   44.1
min     25.0   10.0
25%     25.0   51.2
50%     25.0   79.0
75%     25.0  114.0
max     25.0  249.0

    label
1    462
0    444


In [4]:
graph_labels = pd.get_dummies(labels, drop_first=True)

In [5]:
training_graphs, pred_graphs = model_selection.train_test_split(
    graph_labels, train_size=0.84, stratify=graph_labels, random_state=11,
)

In [6]:
train_graphs = T = [graphs[i] for i in training_graphs.index.values]
train_labels = training_graphs.index.values
print(len(train_graphs))
print(len(pred_graphs))

761
145


In [7]:
def create_model(graphs, k, layer_sizes, activations1, activations2, dense_layers, dense_units, optimizer, conv, dropout_amount, dropout=None):
    generator = PaddedGraphGenerator(graphs=graphs)
    k = k  # the number of rows for the output tensor
    layer_sizes = layer_sizes

    dgcnn_model = DeepGraphCNN(
        layer_sizes= layer_sizes,
        activations= activations1,
        k=k,
        bias=False,
        generator=generator,
    )
    x_inp, x_out = dgcnn_model.in_out_tensors()
    
    if(conv):
        x_out = Conv1D(filters=16, kernel_size=sum(layer_sizes), strides=sum(layer_sizes))(x_out)
    
    x_out = layers.MaxPool1D(pool_size=2)(x_out)
    x_out = Flatten()(x_out)


        
    for i in range(dense_layers):
        if((i % dropout_amount) == 0):
            if(dropout):
                x_out = Dropout(rate=dropout)(x_out)
                
        if(dense_units < 32):
            dense_units = 32
    
        x_out = Dense(units=dense_units, activation= activations2)(x_out)
        dense_units /= 2

    
    predictions = Dense(units=1, activation="sigmoid")(x_out)
    
    model = Model(inputs=x_inp, outputs=predictions)

    model.compile(
        optimizer= eval(optimizer), loss=binary_crossentropy, metrics=["acc"],
    )
    
    #model.summary()
    return model

In [8]:
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix , ConfusionMatrixDisplay, classification_report
import matplotlib.pyplot as plt

kfold = KFold(n_splits=3, shuffle=True, random_state=1)

gen = PaddedGraphGenerator(graphs=graphs)

cv_ACCURACY, cv_SPECIFICITY, cv_SENSITIVITY, cv_PRECISION, cv_LOSS = [[],[],[]], [[],[],[]], [[],[],[]], [[],[],[]], [[],[],[]]


fold_var = 1

for train, val in kfold.split(train_graphs, train_labels):
    train_values = graph_labels.iloc[train]
    val_values = graph_labels.iloc[val]
    #print(list(train_values.index))
    #print(train_values.values)
    
    train_gen = gen.flow(
        list(train_values.index),
        targets=train_values.values,
        batch_size=30,
        symmetric_normalization=False,
        weighted = True
    )
    
    val_gen = gen.flow(
        list(val_values.index),
        targets=val_values.values,
        batch_size=40,
        symmetric_normalization=False,
        weighted = True
    )
    
    pred_gen = gen.flow(
        list(pred_graphs.index - 1),
        targets=pred_graphs.values,
        batch_size=1,
        symmetric_normalization=True,
        weighted = True
    )
    
    
    model = create_model(graphs, 50, [128, 64, 32, 32, 16], ['tanh', 'tanh', 'tanh', 'tanh', 'tanh'], 'relu', 5, 1024, 'optimizers.Adam()', 1, 5, 0.25)
    
    model.compile(
        optimizer=optimizers.Adagrad(), loss=binary_crossentropy, metrics=["acc"],
    )
    
    # checkpoint
    filepath="weights-fold-"+ str(fold_var) + ".h5"
    checkpoint = ModelCheckpoint(filepath, monitor='acc', verbose=1, save_best_only=True, mode='max')
    callbacks_list = [checkpoint]

    history = model.fit(
        train_gen, epochs=250, verbose=0, validation_data=val_gen, shuffle=True, callbacks=callbacks_list
    )
    
    # LOAD BEST MODEL to evaluate the performance of the model
    model.load_weights("./weights-fold-"+str(fold_var)+".h5")
    
    gens = [train_gen, val_gen, pred_gen]
    graph_values= [train_values.values, val_values.values, pred_graphs.values]
    
    #For each set
    for i in range(3):
        
        results = model.evaluate(gens[i])
        results = dict(zip(model.metrics_names,results))

        
        y_test2 = model.predict(gens[i])


        y_test = [1 * (x[0]>=0.5) for x in y_test2]


        conf = confusion_matrix(graph_values[i], y_test)

        TP = conf[1][1]
        TN = conf[0][0]
        FP = conf[0][1]
        FN = conf[1][0]
        conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
        print('accuracy:', round(conf_accuracy,4))
        conf_specificity = (TN / float(TN + FP))
        print('specificity:', round(conf_specificity,4))
        conf_sensitivity = (TP / float(TP + FN))
        print('sensitivity:', round(conf_sensitivity,4))
        conf_precision = (TN / float(TN + FP))
        print('precision:', round(conf_precision,4))

        cv_ACCURACY[i].append(results['acc'])
        cv_SPECIFICITY[i].append(conf_specificity)
        cv_SENSITIVITY[i].append(conf_sensitivity)
        cv_PRECISION[i].append(conf_precision)
        cv_LOSS[i].append(results['loss'])

    tf.keras.backend.clear_session()
    fold_var += 1

Instructions for updating:
Use fn_output_signature instead





Epoch 00001: acc improved from -inf to 0.47929, saving model to weights-fold-1.h5

Epoch 00002: acc improved from 0.47929 to 0.54832, saving model to weights-fold-1.h5

Epoch 00003: acc improved from 0.54832 to 0.58383, saving model to weights-fold-1.h5

Epoch 00004: acc improved from 0.58383 to 0.58974, saving model to weights-fold-1.h5

Epoch 00005: acc did not improve from 0.58974

Epoch 00006: acc did not improve from 0.58974

Epoch 00007: acc did not improve from 0.58974

Epoch 00008: acc did not improve from 0.58974

Epoch 00009: acc did not improve from 0.58974

Epoch 00010: acc did not improve from 0.58974

Epoch 00011: acc did not improve from 0.58974

Epoch 00012: acc did not improve from 0.58974

Epoch 00013: acc did not improve from 0.58974

Epoch 00014: acc did not improve from 0.58974

Epoch 00015: acc did not improve from 0.58974

Epoch 00016: acc did not improve from 0.58974

Epoch 00017: acc did not improve from 0.58974

Epoch 00018: acc did not improve from 0.58974






Epoch 00001: acc improved from -inf to 0.47535, saving model to weights-fold-2.h5

Epoch 00002: acc improved from 0.47535 to 0.51085, saving model to weights-fold-2.h5

Epoch 00003: acc improved from 0.51085 to 0.57199, saving model to weights-fold-2.h5

Epoch 00004: acc did not improve from 0.57199

Epoch 00005: acc did not improve from 0.57199

Epoch 00006: acc did not improve from 0.57199

Epoch 00007: acc did not improve from 0.57199

Epoch 00008: acc did not improve from 0.57199

Epoch 00009: acc did not improve from 0.57199

Epoch 00010: acc did not improve from 0.57199

Epoch 00011: acc did not improve from 0.57199

Epoch 00012: acc did not improve from 0.57199

Epoch 00013: acc did not improve from 0.57199

Epoch 00014: acc did not improve from 0.57199

Epoch 00015: acc did not improve from 0.57199

Epoch 00016: acc did not improve from 0.57199

Epoch 00017: acc did not improve from 0.57199

Epoch 00018: acc did not improve from 0.57199

Epoch 00019: acc did not improve from 0




Epoch 00001: acc improved from -inf to 0.40157, saving model to weights-fold-3.h5

Epoch 00002: acc improved from 0.40157 to 0.41929, saving model to weights-fold-3.h5

Epoch 00003: acc improved from 0.41929 to 0.43898, saving model to weights-fold-3.h5

Epoch 00004: acc improved from 0.43898 to 0.53937, saving model to weights-fold-3.h5

Epoch 00005: acc improved from 0.53937 to 0.54921, saving model to weights-fold-3.h5

Epoch 00006: acc improved from 0.54921 to 0.58465, saving model to weights-fold-3.h5

Epoch 00007: acc improved from 0.58465 to 0.58858, saving model to weights-fold-3.h5

Epoch 00008: acc did not improve from 0.58858

Epoch 00009: acc did not improve from 0.58858

Epoch 00010: acc did not improve from 0.58858

Epoch 00011: acc did not improve from 0.58858

Epoch 00012: acc did not improve from 0.58858

Epoch 00013: acc did not improve from 0.58858

Epoch 00014: acc did not improve from 0.58858

Epoch 00015: acc did not improve from 0.58858

Epoch 00016: acc did not

In [9]:
print(np.mean(cv_ACCURACY[0]), np.mean(cv_ACCURACY[1]), np.mean(cv_ACCURACY[2]))
print(np.mean(cv_SPECIFICITY[0]), np.mean(cv_SPECIFICITY[1]), np.mean(cv_SPECIFICITY[2]))
print(np.mean(cv_SENSITIVITY[0]), np.mean(cv_SENSITIVITY[1]),  np.mean(cv_SENSITIVITY[2]))
print(np.mean(cv_PRECISION[0]), np.mean(cv_PRECISION[1]),  np.mean(cv_PRECISION[2]))
print(np.mean(cv_LOSS[0]), np.mean(cv_LOSS[1]), np.mean(cv_LOSS[2]))

0.5834394693374634 0.5834292968114217 0.48965516686439514
1.0 1.0 1.0
0.0 0.0 0.0
1.0 1.0 1.0
0.6918906569480896 0.6919089953104655 0.6934975783030192
