In [28]:
import numpy as np
import pandas as pd

# 1.1 Data Input

In [29]:
å = 0.85
e = 0.00001

# 1.2 Creating an Adjacency Matrix

In [30]:
am = np.matrix('1 0 2 0 4 3; 3 0 1 1 0 0; 2 0 4 0 1 0; 0 0 1 0 0 1; 8 0 3 0 5 2; 0 0 0 0 0 0', dtype=float)
print(am)

[[ 1.  0.  2.  0.  4.  3.]
 [ 3.  0.  1.  1.  0.  0.]
 [ 2.  0.  4.  0.  1.  0.]
 [ 0.  0.  1.  0.  0.  1.]
 [ 8.  0.  3.  0.  5.  2.]
 [ 0.  0.  0.  0.  0.  0.]]


# 1.3 Modifying the Adjacency Matrix

## diagonal of matrix to zero

In [34]:
def digonalZero(am):
    np.fill_diagonal(am, 0)
    
digonalZero(am)
am

matrix([[ 0.,  0.,  2.,  0.,  4.,  3.],
        [ 3.,  0.,  1.,  1.,  0.,  0.],
        [ 2.,  0.,  0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.,  0.,  1.],
        [ 8.,  0.,  3.,  0.,  0.,  2.],
        [ 0.,  0.,  0.,  0.,  0.,  0.]])

## normalize the columns of the matrix

In [5]:
def normalizeCol():
    # sum up each column 
    sum = am.sum(axis=0)
    # divide each entry in a column by the sum of that column
    H = np.divide(am, sum, out=np.zeros_like(am), where=sum!=0)
    return H



[[ 0.          0.          0.28571429  0.          0.8         0.5       ]
 [ 0.23076923  0.          0.14285714  1.          0.          0.        ]
 [ 0.15384615  0.          0.          0.          0.2         0.        ]
 [ 0.          0.          0.14285714  0.          0.          0.16666667]
 [ 0.61538462  0.          0.42857143  0.          0.          0.33333333]
 [ 0.          0.          0.          0.          0.          0.        ]]


# 1.4 Identifying the Dangling Nodes

In [6]:
# if the sum of each column is 0, it means there is no citation
danglingNode = (sum == 0).astype(float)
print(danglingNode)

[[ 0.  1.  0.  0.  0.  0.]]


# 1.5 Calculating the Stationary Vector

## Article Vector

In [7]:
# total number of articles published by all of the journals
Atot = np.matrix("3; 5; 2; 1; 2; 1")

# column vector of the number of articles published in each journal over the (five-year) target window, 
# normalized so that its entries sum to 1
a = Atot / Atot.sum()

print(a)

[[ 0.21428571]
 [ 0.35714286]
 [ 0.14285714]
 [ 0.07142857]
 [ 0.14285714]
 [ 0.07142857]]


## Initial Vector

In [8]:
pi0 = np.ones_like(a) / am.shape[0]
print(pi0)

[[ 0.16666667]
 [ 0.16666667]
 [ 0.16666667]
 [ 0.16666667]
 [ 0.16666667]
 [ 0.16666667]]


## Influence Vector

In [9]:
def calPiK1(H, piK, danglingNode, a):
    # π(k+1) euqation
    p1 = (å * H).dot(piK)
    p2 = (å * danglingNode).dot(piK)
    p2 = p2  + (1 - å)
    p2 = np.multiply(p2, a)
    # return piK1
    return p1 + p2

def iteration(H, piS, danglingNode, a):
    # initialize the piK1 and norm
    piK1 = calPiK1(H, piS, danglingNode, a)
    norm = np.linalg.norm((piK1 - pi0), ord = 1)
    
    counter = 0
    # while residual is less than e, piK ~ piK1 is the influence vector
    while norm > e:
        # calculate the norm again and update the current influence vector to iterate
        piK1 = calPiK1(H, piK, danglingNode, a)
        norm = np.linalg.norm((piK1 - piK), ord = 1)
        piK = piK1
        counter += 1
    return counter, piK1

In [11]:
counter, piK1 = iteration(H, pi0, danglingNode, a)
print("Iterated %i times" %(counter))
print("Influence vector is: {0}".format(piK1))

Iterated 16 times
Influence vector is: [[ 0.29639248]
 [ 0.23860576]
 [ 0.13357154]
 [ 0.0449906 ]
 [ 0.26123859]
 [ 0.02520101]]


# 1.6 Calculationg the EigenFactor (EF) Sco