# The feedforward neural network model for learning PageRank

In [1]:
import time
from mygraph import MyGraph
from helpers import *

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

## Loading the dataset from numpy files

In [3]:
# Pageank as the target or label data
target_pagerank = np.load("dual_BA_20K_1_pageranks.npy", mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')
target_pagerank[0:10]

array([[2.89652064e-03],
       [2.94180472e-03],
       [4.01947517e-04],
       [1.96730235e-03],
       [1.24132127e-03],
       [2.28856196e-04],
       [1.15520877e-03],
       [2.18498941e-04],
       [6.27970487e-05],
       [2.71853911e-05]])

In [4]:
# scaling the PageRank data
target_pagerank = target_pagerank * 1000
target_pagerank[1000:1010]

array([[0.18798959],
       [0.19541857],
       [0.04532814],
       [0.0283429 ],
       [0.09522346],
       [0.34642875],
       [0.0834803 ],
       [0.0835905 ],
       [0.24565844],
       [0.03588204]])

In [5]:
target_pagerank.shape

(20000, 1)

In [6]:
# the order 5 NFDC matrix as the feature set
matrix_1 = np.load("dual_BA_20K_1_NDFC_matrix.npy", mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')

In [7]:
matrix_1.shape

(20000, 6, 21)

In [8]:
matrix_1[111]

array([[1.000000e+00, 1.000000e+00, 3.000000e+00, 3.000000e+00,
        6.000000e+00, 5.000000e+00, 3.000000e+00, 1.000000e+00,
        0.000000e+00, 1.000000e+00, 1.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 1.000000e+00,
        1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        1.000000e+00],
       [2.785714e+00, 5.357140e-01, 4.428571e+00, 2.714286e+00,
        2.857143e+00, 1.357143e+00, 8.571430e-01, 1.285714e+00,
        7.142860e-01, 5.714290e-01, 1.285714e+00, 3.214290e-01,
        2.857140e-01, 3.928570e-01, 1.785710e-01, 7.142900e-02,
        2.142860e-01, 0.000000e+00, 3.571400e-02, 3.571400e-02,
        3.571400e-02],
       [9.779010e-01, 3.848990e-01, 1.786372e+00, 1.095764e+00,
        1.283610e+00, 7.384900e-01, 5.101290e-01, 4.438310e-01,
        2.762430e-01, 2.836100e-01, 2.154700e-01, 1.104970e-01,
        1.160220e-01, 1.749540e-01, 8.103100e-02, 1.988950e-01,
        2.891340e-01, 0.000000e+00, 1.289100e-02, 1.289100

In [9]:
matrix_1 = matrix_1.reshape(20000,-1)
matrix_1.shape

(20000, 126)

# Converting the data into pytorch tenors

In [10]:
features = torch.from_numpy(matrix_1)
targets = torch.from_numpy(target_pagerank)
features.shape, targets.shape

(torch.Size([20000, 126]), torch.Size([20000, 1]))

In [11]:
# Activating the automatic gradient 
features.requires_grad_(True)
targets.requires_grad_(True)

tensor([[2.8965],
        [2.9418],
        [0.4019],
        ...,
        [0.0337],
        [0.0170],
        [0.0378]], dtype=torch.float64, requires_grad=True)

In [12]:
# Shuffling and dividing the indecies
n_samples = features.shape[0]
n_test = 10000
shuffled_ind = torch.randperm(n_samples)
train_ind = shuffled_ind[:-n_test]
test_ind = shuffled_ind[-n_test:]
# Dividing features and targets into tain and test sets
train_features = features[train_ind]
test_features = features[test_ind]
train_targets = targets[train_ind]
test_targets = targets[test_ind]
train_features.shape, test_features.shape, train_targets.shape, test_targets.shape

(torch.Size([10000, 126]),
 torch.Size([10000, 126]),
 torch.Size([10000, 1]),
 torch.Size([10000, 1]))

## A function for dividing train data into batches

In [13]:
# dividing train_features and train_targets into batches
def next_batch(train_features, train_targets, batch_size=100):
    num_features = train_features.shape[0]
    # Shuffling
    shuffled_ind = torch.randperm(num_features)
    shuffled_train_features = train_features[shuffled_ind]
    shuffled_train_targets = train_targets[shuffled_ind]
    # Dividing
    i = 0
    while i < num_features:
        i += batch_size
        yield (shuffled_train_features[i-batch_size:i], shuffled_train_targets[i-batch_size:i])
    return

## The feedforward neural network model

In [14]:
# The Feedforward Neural Network 
class FFNN_model(nn.Module):
    def __init__(self):
        super().__init__()
        num_features = features.shape[1]
        self.fc1 = nn.Linear(num_features, 400)
        self.fc2 = nn.Linear(400, 800)
        self.fc3 = nn.Linear(800, 200)
        self.fc4 = nn.Linear(200, 64)
        self.fc5 = nn.Linear(64, 8)
        self.fc6 = nn.Linear(8, 1)
        self.dropout1 = nn.Dropout(0.4)
        self.dropout2 = nn.Dropout(0.3)
        self.dropout3 = nn.Dropout(0.5)
    
    def forward(self, X):
        X = torch.tanh(self.fc1(X))
        X = torch.relu(self.fc2(X))
        X = self.dropout1(X)
        X = torch.relu(self.fc3(X))
        X = self.dropout3(X)
        X = torch.relu(self.fc4(X))
        X = self.dropout2(X)
        X = torch.tanh(self.fc5(X))
        return self.fc6(X)

In [15]:
# Instantiation of the model
torch.manual_seed(42)
model = FFNN_model()
model

FFNN_model(
  (fc1): Linear(in_features=126, out_features=400, bias=True)
  (fc2): Linear(in_features=400, out_features=800, bias=True)
  (fc3): Linear(in_features=800, out_features=200, bias=True)
  (fc4): Linear(in_features=200, out_features=64, bias=True)
  (fc5): Linear(in_features=64, out_features=8, bias=True)
  (fc6): Linear(in_features=8, out_features=1, bias=True)
  (dropout1): Dropout(p=0.4, inplace=False)
  (dropout2): Dropout(p=0.3, inplace=False)
  (dropout3): Dropout(p=0.5, inplace=False)
)

In [16]:
# Number of parameters
num_para = 0
for param in model.parameters():
    print(param.numel())
    num_para += param.numel()
print("----------------------")
print(f'Number of all parameters: \n{num_para}')

50400
400
320000
800
160000
200
12800
64
512
8
8
1
----------------------
Number of all parameters: 
545193


In [17]:
# Objective function and optimmizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

## Defining the Training Loop

In [18]:
def training_loop(n_epochs=1000,
                  batch_size=100,
                  optimizer=optimizer, 
                  model=model, 
                  loss_fn=criterion, 
                  train_features=train_features, 
                  test_features=test_features, 
                  train_targets=train_targets, 
                  test_targets=test_targets):
    num_features = train_features.shape[0]
    start_time = time.time()
    all_train_loss, all_test_loss = np.zeros(n_epochs), np.zeros(n_epochs)
    for epoch in range(1, n_epochs +1):
        # Training: 
        epoch_losses = []
        # looping through batches
        for train_features, train_targets in next_batch(train_features=train_features, 
                                                        train_targets=train_targets, batch_size=batch_size): 
            train_preds = model(train_features.float())
            train_loss = loss_fn(train_targets.float(), train_preds.float())
            optimizer.zero_grad()
            train_loss.backward(retain_graph=True)
            optimizer.step()
            
            epoch_losses.append(train_loss.item())
        average_epoch_loss = sum(epoch_losses)/len(epoch_losses)
        
        # Test:
        with torch.no_grad():
            test_preds = model(test_features.float())
            test_loss = loss_fn(test_targets.float(), test_preds.float())
        
        all_train_loss[epoch - 1] = average_epoch_loss
        all_test_loss[epoch - 1] = test_loss.item()
        # Printing the result: 
        if epoch == 1 or epoch % 100 == 0:
            print(f"EPOCH: {epoch:{7}}")
            print(f"MEAN TRAIN LOSS:   {average_epoch_loss:.11f},    TEST LOSS:   {test_loss.item():.11f}")
            print("-----------------------------------------")
    print("The total time = ", np.round(time.time() - start_time, 3), " seconds!")
    return all_train_loss, all_test_loss

# Training the model

In [19]:
losses = training_loop(n_epochs=2000,
                  batch_size=400,
                  optimizer=optimizer, 
                  model=model, 
                  loss_fn=criterion, 
                  train_features=train_features, 
                  test_features=test_features, 
                  train_targets=train_targets, 
                  test_targets=test_targets)

EPOCH:       1
MEAN TRAIN LOSS:   0.02658024602,    TEST LOSS:   0.00493069831
-----------------------------------------
EPOCH:     100
MEAN TRAIN LOSS:   0.00082701893,    TEST LOSS:   0.00114639197
-----------------------------------------
EPOCH:     200
MEAN TRAIN LOSS:   0.00043993985,    TEST LOSS:   0.00091089145
-----------------------------------------
EPOCH:     300
MEAN TRAIN LOSS:   0.00027551095,    TEST LOSS:   0.00077658764
-----------------------------------------
EPOCH:     400
MEAN TRAIN LOSS:   0.00023600804,    TEST LOSS:   0.00066516822
-----------------------------------------
EPOCH:     500
MEAN TRAIN LOSS:   0.00018081708,    TEST LOSS:   0.00063394918
-----------------------------------------
EPOCH:     600
MEAN TRAIN LOSS:   0.00018207848,    TEST LOSS:   0.00057946070
-----------------------------------------
EPOCH:     700
MEAN TRAIN LOSS:   0.00012774325,    TEST LOSS:   0.00057457626
-----------------------------------------
EPOCH:     800
MEAN TRAIN LOSS: 

In [20]:
# Saving the train losses of pagerank as a numpy array into a file
np.save("ffnn_pagerank_train_losses_random_1.npy", losses[0], allow_pickle=False, fix_imports=True)

In [21]:
# Saving the test losses of pagerank as a numpy array into a file
np.save("ffnn_pagerank_test_losses_random_1.npy", losses[1], allow_pickle=False, fix_imports=True)

In [22]:
model.eval()
errors = []
for i in range(n_test):
    targ = test_targets[i].item()
    feat = test_features[i].float().view(1,1,126)
    pred = model(feat).item()
    inaccuracy = abs(1 - pred/targ) * 100
    errors.append(inaccuracy)
    if i%100 == 0:
        print(f"target {targ},    prediction: {pred}\nindex {i}:       inaccuracy: {np.round(inaccuracy, 3)}%")
        print("-----------------------------------------------------------------")

target 0.01840680925145153,    prediction: 0.019018977880477905
index 0:       inaccuracy: 3.326%
-----------------------------------------------------------------
target 0.01835879549224586,    prediction: 0.01902949810028076
index 100:       inaccuracy: 3.653%
-----------------------------------------------------------------
target 0.01743346615496248,    prediction: 0.019047439098358154
index 200:       inaccuracy: 9.258%
-----------------------------------------------------------------
target 0.018607265079228635,    prediction: 0.0190163254737854
index 300:       inaccuracy: 2.198%
-----------------------------------------------------------------
target 0.01911591260040829,    prediction: 0.019030600786209106
index 400:       inaccuracy: 0.446%
-----------------------------------------------------------------
target 0.023483676059293225,    prediction: 0.019006997346878052
index 500:       inaccuracy: 19.063%
-----------------------------------------------------------------
target

target 0.01660095830072369,    prediction: 0.019013136625289917
index 5000:       inaccuracy: 14.53%
-----------------------------------------------------------------
target 0.08245329825166986,    prediction: 0.07340478897094727
index 5100:       inaccuracy: 10.974%
-----------------------------------------------------------------
target 0.01690278994810026,    prediction: 0.01900780200958252
index 5200:       inaccuracy: 12.454%
-----------------------------------------------------------------
target 0.01749730654301005,    prediction: 0.01904892921447754
index 5300:       inaccuracy: 8.868%
-----------------------------------------------------------------
target 0.02018013027125665,    prediction: 0.019020676612854004
index 5400:       inaccuracy: 5.746%
-----------------------------------------------------------------
target 0.020111292363987288,    prediction: 0.019011855125427246
index 5500:       inaccuracy: 5.467%
----------------------------------------------------------------

In [23]:
print("Average inaccuracy: ", np.round(sum(errors)/len(errors), 3))

Average inaccuracy:  8.296


In [24]:
torch.save(model, "trained_FFNN_model_pagerank_random_1.pt")

# Testing the model on the second random graph

In [25]:
matrix2 = np.load("dual_BA_20K_2_NDFC_matrix.npy", mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')
matrix2.shape

(20000, 6, 21)

In [26]:
matrix2 = matrix2.reshape(20000,-1)

In [27]:
# Loading and rescaling the pageank of the second graph as the target or label data
target_pagerank2 = np.load("dual_BA_20K_2_pageranks.npy", mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')
target_pagerank2 = target_pagerank2 * 1000
target_pagerank2[0:10]

array([[3.30758022],
       [0.42596893],
       [0.67540063],
       [1.41235202],
       [0.22481765],
       [3.11042673],
       [1.49045843],
       [0.14881602],
       [0.90917785],
       [1.1157608 ]])

In [28]:
features2 = torch.from_numpy(matrix2)
targets2 = torch.from_numpy(target_pagerank2)
features2.shape, targets2.shape

(torch.Size([20000, 126]), torch.Size([20000, 1]))

In [29]:
errors2 = []
for i in range(20000):
    t = targets2[i].item()
    featu = features2[i].float().view(1,1,126)
    p = model(featu).item()
    errors2.append(100*abs(t-p)/t)

print("Average inaccuracy: ", np.round(sum(errors2)/len(errors2), 3))

Average inaccuracy:  8.926
