# Learning the closeness centrality of a real world graph using the $p$-aggregation of the RCDF matrix representation of nodes

In [1]:
from mygraph import MyGraph
from helpers import *
import time

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

## Loading the dataset from numpy files

In [3]:
# closeness centrality of the first graph as the target or label data
target_closeness1 = np.load("dual_BA_20K_1_closeness.npy", mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')
target_closeness1[0]

array([0.31004279])

In [4]:
target_closeness1.shape

(20000, 1)

## Loading the RCDF  matrix as a numpy array

In [5]:
# the RCDF matrix of the first graph as the feature set
matrix1 = np.load("dual_BA_20K_1_RDFC_matrix.npy", mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')

In [6]:
matrix1.shape

(20000, 2, 21)

In [7]:
matrix1[121]

array([[ 2.,  3.,  9.,  1.,  4.,  3.,  1.,  1.,  1.,  1.,  1.,  0.,  2.,
         1.,  0.,  0.,  0.,  0.,  0.,  0.,  1.],
       [61., 21., 93., 58., 66., 41., 17., 25., 13., 15., 11.,  3.,  5.,
         6.,  5.,  0.,  2.,  0.,  0.,  1.,  3.]])

# The p-aggregation of the RCDF matrix

To apply this aggregation we just need to multiply the following row matrix from left to the RCDF matrix:
### $$
\left[ 1, p, p^2, \cdots, p^{(r -1 )} \right],
$$
where $r$ is the order of the RCDF matrix representation (the number of rows).

In [8]:
p = 0.2
order = matrix1.shape[1]
parameter_vector = np.zeros(order)
for i in range(order):
    parameter_vector[i] += p**i
parameter_vector

array([1. , 0.2])

In [9]:
res_matrix = np.matmul(parameter_vector, matrix1)
res_matrix.shape

(20000, 21)

In [10]:
res_matrix[121]

array([14.2,  7.2, 27.6, 12.6, 17.2, 11.2,  4.4,  6. ,  3.6,  4. ,  3.2,
        0.6,  3. ,  2.2,  1. ,  0. ,  0.4,  0. ,  0. ,  0.2,  1.6])

# Converting the dataset into pytorch tensors

In [11]:
features = torch.from_numpy(res_matrix)
targets = torch.from_numpy(target_closeness1)
features.shape, targets.shape

(torch.Size([20000, 21]), torch.Size([20000, 1]))

In [12]:
# Activating the automatic gradient 
features.requires_grad_(True)
targets.requires_grad_(True)

tensor([[0.3100],
        [0.3022],
        [0.2597],
        ...,
        [0.2122],
        [0.1760],
        [0.1809]], dtype=torch.float64, requires_grad=True)

In [13]:
# Shuffling and dividing the indecies
n_samples = features.shape[0]
n_test = 10000
shuffled_ind = torch.randperm(n_samples)
train_ind = shuffled_ind[:-n_test]
test_ind = shuffled_ind[-n_test:]
# Dividing features and targets into tain and test sets
train_features = features[train_ind]
test_features = features[test_ind]
train_targets = targets[train_ind]
test_targets = targets[test_ind]
train_features.shape, test_features.shape, train_targets.shape, test_targets.shape

(torch.Size([10000, 21]),
 torch.Size([10000, 21]),
 torch.Size([10000, 1]),
 torch.Size([10000, 1]))

## A function for dividing train data into batches

In [14]:
# dividing train_features and train_targets into batches
def next_batch(train_features, train_targets, batch_size=100):
    num_features = train_features.shape[0]
    # Shuffling
    shuffled_ind = torch.randperm(num_features)
    shuffled_train_features = train_features[shuffled_ind]
    shuffled_train_targets = train_targets[shuffled_ind]
    # Dividing
    i = 0
    while i < num_features:
        i += batch_size
        yield (shuffled_train_features[i-batch_size:i], shuffled_train_targets[i-batch_size:i])
    return

## The feedforward neural network model

In [15]:
# The Feedforward Neural Network 
class FFNN_model(nn.Module):
    def __init__(self):
        super().__init__()
        num_features = features.shape[1]
        self.fc1 = nn.Linear(num_features, 64)
        self.fc2 = nn.Linear(64, 8)
        self.fc3 = nn.Linear(8, 1)
        self.dropout1 = nn.Dropout(0.3)
        
    def forward(self, X):
        X = torch.tanh(self.fc1(X))
        X = self.dropout1(X)
        X = torch.relu(self.fc2(X))
        return self.fc3(X)

In [16]:
# Instantiation of the model
torch.manual_seed(42)
model = FFNN_model()
model

FFNN_model(
  (fc1): Linear(in_features=21, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=8, bias=True)
  (fc3): Linear(in_features=8, out_features=1, bias=True)
  (dropout1): Dropout(p=0.3, inplace=False)
)

In [17]:
num_para = 0
for param in model.parameters():
    print(param.numel())
    num_para += param.numel()
print("----------------------")
print(f'Number of all parameters: \n{num_para}')

1344
64
512
8
8
1
----------------------
Number of all parameters: 
1937


In [18]:
# Objective function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

## Defining the Training Loop

In [19]:
def training_loop(n_epochs=1000,
                  batch_size=100,
                  optimizer=optimizer, 
                  model=model, 
                  loss_fn=criterion, 
                  train_features=train_features, 
                  test_features=test_features, 
                  train_targets=train_targets, 
                  test_targets=test_targets):
    num_features = train_features.shape[0]
    start_time = time.time()
    all_train_loss, all_test_loss = np.zeros(n_epochs), np.zeros(n_epochs)
    for epoch in range(1, n_epochs +1):
        # Training: 
        epoch_losses = []
        # looping through batches
        for train_features, train_targets in next_batch(train_features=train_features, 
                                                        train_targets=train_targets, batch_size=batch_size): 
            train_preds = model(train_features.float())
            train_loss = loss_fn(train_targets.float(), train_preds.float())
            optimizer.zero_grad()
            train_loss.backward(retain_graph=True)
            optimizer.step()
            
            epoch_losses.append(train_loss.item())
        average_epoch_loss = sum(epoch_losses)/len(epoch_losses)
        
        # Test:
        with torch.no_grad():
            test_preds = model(test_features.float())
            test_loss = loss_fn(test_targets.float(), test_preds.float())
        
        all_train_loss[epoch - 1] = average_epoch_loss
        all_test_loss[epoch - 1] = test_loss.item()
        # Printing the result: 
        if epoch == 1 or epoch % 100 == 0:
            print(f"EPOCH: {epoch:{7}}")
            print(f"MEAN TRAIN LOSS:   {average_epoch_loss:.11f},    Test LOSS:   {test_loss.item():.11f}")
            print("-----------------------------------------")
    print("The total time = ", np.round(time.time() - start_time, 3), " seconds!")
    return all_train_loss, all_test_loss

# Training the model

In [20]:
losses = training_loop(n_epochs=2000,
                  batch_size=400,
                  optimizer=optimizer, 
                  model=model, 
                  loss_fn=criterion, 
                  train_features=train_features, 
                  test_features=test_features, 
                  train_targets=train_targets, 
                  test_targets=test_targets)

EPOCH:       1
MEAN TRAIN LOSS:   0.00660361515,    Test LOSS:   0.00330327265
-----------------------------------------
EPOCH:     100
MEAN TRAIN LOSS:   0.00067688688,    Test LOSS:   0.00069015572
-----------------------------------------
EPOCH:     200
MEAN TRAIN LOSS:   0.00033057071,    Test LOSS:   0.00029267647
-----------------------------------------
EPOCH:     300
MEAN TRAIN LOSS:   0.00016865108,    Test LOSS:   0.00017619619
-----------------------------------------
EPOCH:     400
MEAN TRAIN LOSS:   0.00011467274,    Test LOSS:   0.00011847387
-----------------------------------------
EPOCH:     500
MEAN TRAIN LOSS:   0.00007863306,    Test LOSS:   0.00009142506
-----------------------------------------
EPOCH:     600
MEAN TRAIN LOSS:   0.00006212524,    Test LOSS:   0.00007080169
-----------------------------------------
EPOCH:     700
MEAN TRAIN LOSS:   0.00005583291,    Test LOSS:   0.00006047572
-----------------------------------------
EPOCH:     800
MEAN TRAIN LOSS: 

In [21]:
model.eval()
errors1 = []
for i in range(n_test):
    targ = targets[i].item()
    feat = features[i].float().view(1,1,21)
    pred = model(feat).item()
    inaccuracy = abs(1 - pred/targ) * 100
    errors1.append(inaccuracy)
    if i%100 == 0:
        print(f"target {targ},    prediction: {pred}\nindex {i}:       inaccuracy: {np.round(inaccuracy, 3)}%")
        print("-----------------------------------")
print("Average inaccuracy on the test set of the first graph: ", np.round(sum(errors1)/len(errors1), 3))

target 0.3100427880441523,    prediction: 0.25735872983932495
index 0:       inaccuracy: 16.993%
-----------------------------------
target 0.2147105556987031,    prediction: 0.21515420079231262
index 100:       inaccuracy: 0.207%
-----------------------------------
target 0.24471990406500085,    prediction: 0.2433473765850067
index 200:       inaccuracy: 0.561%
-----------------------------------
target 0.23233848763316564,    prediction: 0.23336240649223328
index 300:       inaccuracy: 0.441%
-----------------------------------
target 0.2443849744604932,    prediction: 0.24472856521606445
index 400:       inaccuracy: 0.141%
-----------------------------------
target 0.20762003633532314,    prediction: 0.20937180519104004
index 500:       inaccuracy: 0.844%
-----------------------------------
target 0.21246600373958865,    prediction: 0.21197843551635742
index 600:       inaccuracy: 0.229%
-----------------------------------
target 0.2182129646804657,    prediction: 0.2159878611564636

target 0.14875338430870846,    prediction: 0.16156214475631714
index 6600:       inaccuracy: 8.611%
-----------------------------------
target 0.184173205142373,    prediction: 0.18597808480262756
index 6700:       inaccuracy: 0.98%
-----------------------------------
target 0.20101113657379488,    prediction: 0.20193925499916077
index 6800:       inaccuracy: 0.462%
-----------------------------------
target 0.21312942931741888,    prediction: 0.2152559906244278
index 6900:       inaccuracy: 0.998%
-----------------------------------
target 0.17519162542157593,    prediction: 0.17607198655605316
index 7000:       inaccuracy: 0.503%
-----------------------------------
target 0.20188570678672738,    prediction: 0.20378068089485168
index 7100:       inaccuracy: 0.939%
-----------------------------------
target 0.19180565279522763,    prediction: 0.1951405107975006
index 7200:       inaccuracy: 1.739%
-----------------------------------
target 0.16649877201015692,    prediction: 0.16313919

# Testing the model on the second random graph

In [22]:
matrix2 = np.load("dual_BA_20K_2_RDFC_matrix.npy", mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')

In [23]:
# closeness centrality as the target or label data
target_closeness2 = np.load("dual_BA_20K_2_closeness.npy", mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')
target_closeness2[0]

array([0.31145269])

In [24]:
res_matrix2 = np.matmul(parameter_vector, matrix2)
res_matrix2.shape

(20000, 21)

# Converting the dataset of the second graph into pytorch tensors

In [25]:
features2 = torch.from_numpy(res_matrix2)
targets2 = torch.from_numpy(target_closeness2)
features2.shape, targets2.shape

(torch.Size([20000, 21]), torch.Size([20000, 1]))

In [26]:
model.eval()
errors2 = []
for i in range(20000):
    targ = targets2[i].item()
    feat = features2[i].float().view(1,1,21)
    pred = model(feat).item()
    inaccuracy = abs(1 - pred/targ) * 100
    errors2.append(inaccuracy)
    if i%100 == 0:
        print(f"target {targ},    prediction: {pred}\nindex {i}:       inaccuracy: {np.round(inaccuracy, 3)}%")
        print("-----------------------------------")
print("Average inaccuracy on the second graph: ", np.round(sum(errors2)/len(errors2), 3))

target 0.31145268797109577,    prediction: 0.25239336490631104
index 0:       inaccuracy: 18.963%
-----------------------------------
target 0.24238567913803344,    prediction: 0.23839102685451508
index 100:       inaccuracy: 1.648%
-----------------------------------
target 0.23819108644386747,    prediction: 0.23959751427173615
index 200:       inaccuracy: 0.59%
-----------------------------------
target 0.2253510017352895,    prediction: 0.22863270342350006
index 300:       inaccuracy: 1.456%
-----------------------------------
target 0.19863925307906238,    prediction: 0.20285996794700623
index 400:       inaccuracy: 2.125%
-----------------------------------
target 0.22347248916104231,    prediction: 0.22532951831817627
index 500:       inaccuracy: 0.831%
-----------------------------------
target 0.20671869347253088,    prediction: 0.2130400389432907
index 600:       inaccuracy: 3.058%
-----------------------------------
target 0.23089800725055995,    prediction: 0.23234924674034

target 0.20755970234450408,    prediction: 0.20749345421791077
index 6200:       inaccuracy: 0.032%
-----------------------------------
target 0.23491478040242914,    prediction: 0.228817880153656
index 6300:       inaccuracy: 2.595%
-----------------------------------
target 0.20972325632609404,    prediction: 0.21069970726966858
index 6400:       inaccuracy: 0.466%
-----------------------------------
target 0.2094772234500529,    prediction: 0.20574650168418884
index 6500:       inaccuracy: 1.781%
-----------------------------------
target 0.24088795740887958,    prediction: 0.24043752253055573
index 6600:       inaccuracy: 0.187%
-----------------------------------
target 0.20456195980156497,    prediction: 0.19979554414749146
index 6700:       inaccuracy: 2.33%
-----------------------------------
target 0.1914164568956441,    prediction: 0.19380336999893188
index 6800:       inaccuracy: 1.247%
-----------------------------------
target 0.2051579282116515,    prediction: 0.201259866

target 0.1723799098407992,    prediction: 0.16939902305603027
index 13500:       inaccuracy: 1.729%
-----------------------------------
target 0.16865834015028208,    prediction: 0.16534888744354248
index 13600:       inaccuracy: 1.962%
-----------------------------------
target 0.17865023002367234,    prediction: 0.17835882306098938
index 13700:       inaccuracy: 0.163%
-----------------------------------
target 0.1904376475965567,    prediction: 0.18943148851394653
index 13800:       inaccuracy: 0.528%
-----------------------------------
target 0.16985875538266845,    prediction: 0.16219912469387054
index 13900:       inaccuracy: 4.509%
-----------------------------------
target 0.1898212743339313,    prediction: 0.18908990919589996
index 14000:       inaccuracy: 0.385%
-----------------------------------
target 0.18252758586070622,    prediction: 0.18005378544330597
index 14100:       inaccuracy: 1.355%
-----------------------------------
target 0.17902444701058984,    prediction: 0