# The feedforward neural network model for learning PageRank

In [1]:
import time
from dihelpers import *

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

## Loading the dataset from numpy files

In [3]:
# Pageank as the target or label data
target_pagerank = np.load("epinions_pageranks.npy", mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')
target_pagerank[0]

array([0.00082304])

In [4]:
# scaling the PageRank data
target_pagerank = target_pagerank * 10000
target_pagerank[0:10]

array([[ 8.23038636],
       [10.63970906],
       [ 3.04659061],
       [ 0.60241765],
       [ 2.1125303 ],
       [ 2.98758742],
       [ 3.50898413],
       [ 0.48461399],
       [ 1.68108362],
       [ 0.23305616]])

In [5]:
target_pagerank.shape

(75879, 1)

In [6]:
# the order 5 NFDC matrix as the feature set
matrix_1 = np.load("discounted_epinions_NDFC_matrix_r1_15_sta1_max70_rad3_inward.npy", mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')

In [7]:
matrix_1.shape

(75879, 4, 43)

In [8]:
matrix_1 = matrix_1.reshape(matrix_1.shape[0],-1)
matrix_1.shape

(75879, 172)

# Converting the data into pytorch tenors

In [9]:
features = torch.from_numpy(matrix_1)
targets = torch.from_numpy(target_pagerank)
features.shape, targets.shape

(torch.Size([75879, 172]), torch.Size([75879, 1]))

In [10]:
# Activating the automatic gradient 
features.requires_grad_(True)
targets.requires_grad_(True)

tensor([[ 8.2304],
        [10.6397],
        [ 3.0466],
        ...,
        [ 0.0287],
        [ 0.0287],
        [ 0.0287]], dtype=torch.float64, requires_grad=True)

In [11]:
# Shuffling and dividing the indecies
n_samples = features.shape[0]
n_test = 55879
shuffled_ind = torch.randperm(n_samples)
train_ind = shuffled_ind[:-n_test]
test_ind = shuffled_ind[-n_test:]
# Dividing features and targets into tain and test sets
train_features = features[train_ind]
test_features = features[test_ind]
train_targets = targets[train_ind]
test_targets = targets[test_ind]
train_features.shape, test_features.shape, train_targets.shape, test_targets.shape

(torch.Size([20000, 172]),
 torch.Size([55879, 172]),
 torch.Size([20000, 1]),
 torch.Size([55879, 1]))

## A function for dividing train data into batches

In [12]:
# dividing train_features and train_targets into batches
def next_batch(train_features, train_targets, batch_size=100):
    num_features = train_features.shape[0]
    # Shuffling
    shuffled_ind = torch.randperm(num_features)
    shuffled_train_features = train_features[shuffled_ind]
    shuffled_train_targets = train_targets[shuffled_ind]
    # Dividing
    i = 0
    while i < num_features:
        i += batch_size
        yield (shuffled_train_features[i-batch_size:i], shuffled_train_targets[i-batch_size:i])
    return

## The feedforward neural network model

In [13]:
# The Feedforward Neural Network 
class FFNN_model(nn.Module):
    def __init__(self):
        super().__init__()
        num_features = features.shape[1]
        self.fc1 = nn.Linear(num_features, 400)
        self.fc2 = nn.Linear(400, 800)
        self.fc3 = nn.Linear(800, 200)
        self.fc4 = nn.Linear(200, 64)
        self.fc5 = nn.Linear(64, 8)
        self.fc6 = nn.Linear(8, 1)
        self.dropout1 = nn.Dropout(0.4)
        self.dropout2 = nn.Dropout(0.3)
        self.dropout3 = nn.Dropout(0.5)
    
    def forward(self, X):
        X = torch.tanh(self.fc1(X))
        X = torch.relu(self.fc2(X))
        X = self.dropout1(X)
        X = torch.relu(self.fc3(X))
        X = self.dropout3(X)
        X = torch.relu(self.fc4(X))
        X = self.dropout2(X)
        X = torch.tanh(self.fc5(X))
        return self.fc6(X)

In [14]:
# Instantiation of the model
torch.manual_seed(42)
model = FFNN_model()
model

FFNN_model(
  (fc1): Linear(in_features=172, out_features=400, bias=True)
  (fc2): Linear(in_features=400, out_features=800, bias=True)
  (fc3): Linear(in_features=800, out_features=200, bias=True)
  (fc4): Linear(in_features=200, out_features=64, bias=True)
  (fc5): Linear(in_features=64, out_features=8, bias=True)
  (fc6): Linear(in_features=8, out_features=1, bias=True)
  (dropout1): Dropout(p=0.4, inplace=False)
  (dropout2): Dropout(p=0.3, inplace=False)
  (dropout3): Dropout(p=0.5, inplace=False)
)

In [15]:
# Number of parameters
num_para = 0
for param in model.parameters():
    print(param.numel())
    num_para += param.numel()
print("----------------------")
print(f'Number of all parameters: \n{num_para}')

68800
400
320000
800
160000
200
12800
64
512
8
8
1
----------------------
Number of all parameters: 
563593


In [16]:
# Objective function and optimmizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

## Defining the Training Loop

In [17]:
def training_loop(n_epochs=1000,
                  batch_size=100,
                  optimizer=optimizer, 
                  model=model, 
                  loss_fn=criterion, 
                  train_features=train_features, 
                  test_features=test_features, 
                  train_targets=train_targets, 
                  test_targets=test_targets):
    num_features = train_features.shape[0]
    start_time = time.time()
    all_train_loss, all_test_loss = np.zeros(n_epochs), np.zeros(n_epochs)
    for epoch in range(1, n_epochs +1):
        # Training: 
        epoch_losses = []
        # looping through batches
        for train_features, train_targets in next_batch(train_features=train_features, 
                                                        train_targets=train_targets, batch_size=batch_size): 
            train_preds = model(train_features.float())
            train_loss = loss_fn(train_targets.float(), train_preds.float())
            optimizer.zero_grad()
            train_loss.backward(retain_graph=True)
            optimizer.step()
            
            epoch_losses.append(train_loss.item())
        average_epoch_loss = sum(epoch_losses)/len(epoch_losses)
        
        # Test:
        with torch.no_grad():
            test_preds = model(test_features.float())
            test_loss = loss_fn(test_targets.float(), test_preds.float())
        
        all_train_loss[epoch - 1] = average_epoch_loss
        all_test_loss[epoch - 1] = test_loss.item()
        # Printing the result: 
        if (epoch < 100 and epoch % 10 == 0) or epoch % 100 == 0:
            print(f"EPOCH: {epoch:{7}}")
            print(f"MEAN TRAIN LOSS:   {average_epoch_loss:.11f},    TEST LOSS:   {test_loss.item():.11f}")
            print("-----------------------------------------")
    print("The total time = ", np.round(time.time() - start_time, 3), " seconds!")
    return all_train_loss, all_test_loss

# Training the model

In [18]:
losses = training_loop(n_epochs=1000,
                  batch_size=400,
                  optimizer=optimizer, 
                  model=model, 
                  loss_fn=criterion, 
                  train_features=train_features, 
                  test_features=test_features, 
                  train_targets=train_targets, 
                  test_targets=test_targets)

EPOCH:      10
MEAN TRAIN LOSS:   0.20808953047,    TEST LOSS:   0.17618757486
-----------------------------------------
EPOCH:      20
MEAN TRAIN LOSS:   0.20346432924,    TEST LOSS:   0.17273277044
-----------------------------------------
EPOCH:      30
MEAN TRAIN LOSS:   0.19331371784,    TEST LOSS:   0.16960695386
-----------------------------------------
EPOCH:      40
MEAN TRAIN LOSS:   0.18650512397,    TEST LOSS:   0.16623705626
-----------------------------------------
EPOCH:      50
MEAN TRAIN LOSS:   0.18016521633,    TEST LOSS:   0.16121105850
-----------------------------------------
EPOCH:      60
MEAN TRAIN LOSS:   0.17371949553,    TEST LOSS:   0.16276387870
-----------------------------------------
EPOCH:      70
MEAN TRAIN LOSS:   0.16747727990,    TEST LOSS:   0.15677383542
-----------------------------------------
EPOCH:      80
MEAN TRAIN LOSS:   0.16429983079,    TEST LOSS:   0.15423041582
-----------------------------------------
EPOCH:      90
MEAN TRAIN LOSS: 

In [19]:
model.eval()
errors = []
for i in range(n_test):
    targ = test_targets[i].item()
    feat = test_features[i].float().view(1,1,features.shape[1])
    pred = model(feat).item()
    inaccuracy = abs(1 - pred/targ) * 100
    errors.append(inaccuracy)
    if i%100 == 0:
        print(f"target {targ},    prediction: {pred}\nindex {i}:       inaccuracy: {np.round(inaccuracy, 3)}%")
        print("-----------------------------------------------------------------")

target 0.14155744456624883,    prediction: 0.30246633291244507
index 0:       inaccuracy: 113.67%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 100:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.03571832180941778,    prediction: 0.03784945607185364
index 200:       inaccuracy: 5.967%
-----------------------------------------------------------------
target 0.03600122962909758,    prediction: 0.03369048237800598
index 300:       inaccuracy: 6.419%
-----------------------------------------------------------------
target 0.02939993686653839,    prediction: 0.029312223196029663
index 400:       inaccuracy: 0.298%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 500:       inaccuracy: 2.148%
-----------------------------------------------------------------
target

target 0.03934834588909972,    prediction: 0.04096728563308716
index 5100:       inaccuracy: 4.114%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 5200:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 5300:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 5400:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 5500:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 5600:       inaccuracy: 2.148%
------------------------------------------------------------

-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 10300:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.03129621349408912,    prediction: 0.02984556555747986
index 10400:       inaccuracy: 4.635%
-----------------------------------------------------------------
target 0.18561664424864724,    prediction: 0.2813045382499695
index 10500:       inaccuracy: 51.551%
-----------------------------------------------------------------
target 0.03308864912013943,    prediction: 0.031381815671920776
index 10600:       inaccuracy: 5.158%
-----------------------------------------------------------------
target 0.06014459040325145,    prediction: 0.04412683844566345
index 10700:       inaccuracy: 26.632%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 10800:       inaccuracy:

target 0.041551870473684915,    prediction: 0.03583678603172302
index 15500:       inaccuracy: 13.754%
-----------------------------------------------------------------
target 0.050726282758528814,    prediction: 0.04270792007446289
index 15600:       inaccuracy: 15.807%
-----------------------------------------------------------------
target 0.06981603743802307,    prediction: 0.041735172271728516
index 15700:       inaccuracy: 40.221%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 15800:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.06824093388291143,    prediction: 0.07837679982185364
index 15900:       inaccuracy: 14.853%
-----------------------------------------------------------------
target 0.05772726477475667,    prediction: 0.052163004875183105
index 16000:       inaccuracy: 9.639%
------------------------------------------------------

target 0.05156905486967329,    prediction: 0.04462766647338867
index 20400:       inaccuracy: 13.46%
-----------------------------------------------------------------
target 4.19934131790734,    prediction: 4.696325302124023
index 20500:       inaccuracy: 11.835%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 20600:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.047566499389035045,    prediction: 0.03948518633842468
index 20700:       inaccuracy: 16.99%
-----------------------------------------------------------------
target 0.03159681545450326,    prediction: 0.03672674298286438
index 20800:       inaccuracy: 16.236%
-----------------------------------------------------------------
target 0.08360665951960014,    prediction: 0.1813952922821045
index 20900:       inaccuracy: 116.963%
---------------------------------------------------------------

target 0.08290994067588334,    prediction: 0.05425584316253662
index 25300:       inaccuracy: 34.561%
-----------------------------------------------------------------
target 0.06105678985601916,    prediction: 0.047435909509658813
index 25400:       inaccuracy: 22.309%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 25500:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.031586166159082,    prediction: 0.04086300730705261
index 25600:       inaccuracy: 29.37%
-----------------------------------------------------------------
target 0.1442902379839663,    prediction: 0.1614762842655182
index 25700:       inaccuracy: 11.911%
-----------------------------------------------------------------
target 0.13473819356128844,    prediction: 0.3252522349357605
index 25800:       inaccuracy: 141.396%
-------------------------------------------------------------

target 0.13515639612190467,    prediction: 0.3798360526561737
index 30500:       inaccuracy: 181.034%
-----------------------------------------------------------------
target 0.041751560351310626,    prediction: 0.04326927661895752
index 30600:       inaccuracy: 3.635%
-----------------------------------------------------------------
target 0.08171637756657703,    prediction: 0.061160773038864136
index 30700:       inaccuracy: 25.155%
-----------------------------------------------------------------
target 0.0343369428222177,    prediction: 0.03207603096961975
index 30800:       inaccuracy: 6.584%
-----------------------------------------------------------------
target 0.10268259858633066,    prediction: 0.10055223107337952
index 30900:       inaccuracy: 2.075%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 31000:       inaccuracy: 2.148%
-----------------------------------------------------------

target 0.033120468910585794,    prediction: 0.031069666147232056
index 35800:       inaccuracy: 6.192%
-----------------------------------------------------------------
target 0.08546926197294827,    prediction: 0.09620076417922974
index 35900:       inaccuracy: 12.556%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 36000:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.09661246935008795,    prediction: 0.16050133109092712
index 36100:       inaccuracy: 66.129%
-----------------------------------------------------------------
target 0.09821767031841674,    prediction: 0.1458364725112915
index 36200:       inaccuracy: 48.483%
-----------------------------------------------------------------
target 0.0779269848256315,    prediction: 0.05659380555152893
index 36300:       inaccuracy: 27.376%
----------------------------------------------------------

target 0.1732540869525928,    prediction: 0.34477776288986206
index 40900:       inaccuracy: 99.001%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 41000:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.06386109835646551,    prediction: 0.04911699891090393
index 41100:       inaccuracy: 23.088%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 41200:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.0901831441359827,    prediction: 0.0704127848148346
index 41300:       inaccuracy: 21.922%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 41400:       inaccuracy: 2.148%
----------------------------------------------------------

target 0.028653960184055832,    prediction: 0.029269516468048096
index 46100:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.08032485227584288,    prediction: 0.04308411478996277
index 46200:       inaccuracy: 46.363%
-----------------------------------------------------------------
target 0.03330391100571472,    prediction: 0.032400161027908325
index 46300:       inaccuracy: 2.714%
-----------------------------------------------------------------
target 0.1464544246933479,    prediction: 0.21050390601158142
index 46400:       inaccuracy: 43.733%
-----------------------------------------------------------------
target 0.35717864853166353,    prediction: 0.34964555501937866
index 46500:       inaccuracy: 2.109%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 46600:       inaccuracy: 2.148%
----------------------------------------------------------

target 0.028653960184055832,    prediction: 0.029269516468048096
index 51300:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.031138488613298845,    prediction: 0.03165900707244873
index 51400:       inaccuracy: 1.672%
-----------------------------------------------------------------
target 0.028653960184055832,    prediction: 0.029269516468048096
index 51500:       inaccuracy: 2.148%
-----------------------------------------------------------------
target 0.057321251178335225,    prediction: 0.0521598756313324
index 51600:       inaccuracy: 9.004%
-----------------------------------------------------------------
target 0.0933991882292515,    prediction: 0.1588141918182373
index 51700:       inaccuracy: 70.038%
-----------------------------------------------------------------
target 0.16945640485612284,    prediction: 0.08517757058143616
index 51800:       inaccuracy: 49.735%
-----------------------------------------------------------

In [20]:
print("Average inaccuracy: ", np.round(sum(errors)/len(errors), 3))

Average inaccuracy:  19.425


In [21]:
torch.save(model, "trained_FFNN_pagerank_epinions_discounted0.pt")