In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.io
from scipy.sparse.linalg import eigs
from scipy import stats

# Load dataset

In [None]:
file = scipy.io.loadmat('univ_cn.mat')
univ_cn = file['univ_cn']
rank_cn = file['rank_cn']
W_cn = file['W_cn']

In [None]:
# get the number of the universities
n = univ_cn.shape[1]

# get the names of the universities
univ = [univ_cn[0][i][0].split('.')[0] for i in range(n)]

# get the research ranking of these universities
research_rank = file['rank_cn'].reshape((n,))
research_idx = np.arange(n,)

In [None]:
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np

G = nx.from_numpy_array(np.matrix(W_cn), create_using=nx.DiGraph)

# PageRank

In [None]:
def PageRank(alpha, W):

    D = np.sum(W, axis = 1)
    n = W.shape[0]
    indices = [i for i in range(n) if D[i]>0]
    T = np.zeros((n,n))
    for i in indices:
        T[i,:] = W[i,:] / D[i]

    T1 = T * alpha + (1 - alpha) * np.ones((n,n)) / n
    leval, levec = eigs(T1.T,1)
    levec = levec.reshape((n,))

    pagerank_score = np.abs(levec/np.sum(levec))

    pagerank_idx = np.argsort(-pagerank_score)

    univ_pagerank = [univ[i] for i in pagerank_idx]

    return pagerank_score, pagerank_idx, univ_pagerank

In [None]:
pagerank_score, pagerank_idx, univ_pagerank = PageRank(0.85,W_cn)
print(univ_pagerank)

['tsinghua', 'pku', 'sjtu', 'nju', 'uestc', 'scut', 'zsu', 'dlut', 'fudan', 'seu', 'zju', 'ustc', 'hust', 'gzsums', 'jnu', 'whu', 'bnu', 'tju', 'lzu', 'ecnu', 'tongji', 'swjtu', 'xmu', 'nankai', 'jlu', 'bupt', 'ruc', 'bfsu', 'njau', 'sdu', 'fzu', 'nuaa', 'hit', 'cau', 'cumt', 'njtu', 'swufe', 'xidian', 'bit', 'neu', 'njust', 'buaa', 'cqu', 'ecust', 'nwu', 'nwpu', 'shufe', 'cug', 'nenu', 'tyut', 'ccnu', 'njmu', 'xju', 'scau', 'dlmu', 'ouqd', 'znufe', 'shsmu', 'shisu', 'ustb', 'sicau', 'hfut', 'njim', 'ccom', 'cupl', 'scu', 'uibe', 'henu', 'cpums', 'usst', 'hzau', 'csu', 'cdut', 'cun', 'sdust', 'nip']


# HITS authority ranking

In [None]:
U,S,V = np.linalg.svd(W_cn)
authority_score = V[0,:] / np.sum(V[0,:])
authority_idx = np.argsort(-authority_score)
univ_authrank = [univ[i] for i in authority_idx]
print(univ_authrank)
rou, p_value = stats.spearmanr(research_idx,authority_idx)
print("rou = " + str(rou))
tau, p_value = stats.kendalltau(research_idx,authority_idx)
print("tau = " + str(tau))

['tsinghua', 'pku', 'uestc', 'sjtu', 'nju', 'fudan', 'zsu', 'scut', 'zju', 'gzsums', 'seu', 'tju', 'whu', 'hust', 'ecnu', 'tongji', 'dlut', 'xmu', 'jlu', 'nankai', 'bnu', 'ustc', 'ruc', 'hit', 'cau', 'bupt', 'sdu', 'jnu', 'buaa', 'lzu', 'cqu', 'njtu', 'fzu', 'cumt', 'njust', 'nwpu', 'ecust', 'swjtu', 'neu', 'nuaa', 'xidian', 'njau', 'nwu', 'bit', 'swufe', 'ccnu', 'nenu', 'tyut', 'cug', 'scau', 'shufe', 'hfut', 'njmu', 'shsmu', 'sicau', 'xju', 'ouqd', 'shisu', 'ccom', 'znufe', 'ustb', 'njim', 'dlmu', 'scu', 'uibe', 'henu', 'bfsu', 'cpums', 'cupl', 'hzau', 'usst', 'csu', 'cun', 'sdust', 'cdut', 'nip']
rou = 0.7505126452494874
tau = 0.5719298245614035


# HITS hub ranking

In [None]:
hub_score = U[:,0] / np.sum(U[:,0])
hub_idx = np.argsort(-hub_score)
univ_hubrank = [univ[i] for i in hub_idx]
print(univ_hubrank)
rou, p_value = stats.spearmanr(research_idx,hub_idx)
print("rou = " + str(rou))
tau, p_value = stats.kendalltau(research_idx,hub_idx)
print("tau = " + str(tau))

['pku', 'ustc', 'zsu', 'sjtu', 'zju', 'seu', 'njau', 'whu', 'tju', 'tsinghua', 'hit', 'sdu', 'cau', 'lzu', 'swufe', 'fudan', 'nju', 'scu', 'scau', 'jlu', 'bnu', 'tyut', 'nenu', 'cug', 'gzsums', 'hzau', 'neu', 'ccnu', 'ecnu', 'hust', 'scut', 'tongji', 'swjtu', 'nwpu', 'jnu', 'nankai', 'dlut', 'fzu', 'shufe', 'nwu', 'bfsu', 'xidian', 'sicau', 'ruc', 'cun', 'znufe', 'sdust', 'cqu', 'njust', 'dlmu', 'xju', 'cdut', 'njtu', 'nip', 'bupt', 'cupl', 'henu', 'ouqd', 'csu', 'nuaa', 'shisu', 'bit', 'uestc', 'cumt', 'njim', 'njmu', 'ustb', 'cpums', 'buaa', 'xmu', 'uibe', 'hfut', 'usst', 'ecust', 'ccom', 'shsmu']
rou = 0.5395762132604237
tau = 0.3782456140350878


# Weighted PageRank

In [None]:
def W_in(W):

    I = np.sum(W, axis = 0)
    n = W.shape[0]

    W_in = np.zeros((n,n))
    for i in range(n):
      temp = np.zeros(n)
      for j in range(n):
        if W[i][j]>0:
          temp[j]=I[j]
      for j in range(n):
        if W[i][j]>0:
          W_in[i][j]=temp[j]/np.sum(temp)

    return W_in

In [None]:
def W_out(W):

    O = np.sum(W, axis = 1)
    n = W.shape[0]

    W_out = np.zeros((n,n))
    for i in range(n):
      temp = np.zeros(n)
      for j in range(n):
        if W[i][j]>0:
          temp[j]=O[j]
      for j in range(n):
        if W[i][j]>0:
          W_out[i][j]=temp[j]/np.sum(temp)

    return W_out

In [None]:
def WeightedPageRank(alpha, W, W_in, W_out):

    D = np.sum(W, axis = 1)
    n = W.shape[0]
    indices = [i for i in range(n) if D[i]>0]
    T = np.zeros((n,n))
    for i in range(n):
      for j in range(n):
        T[i,j] = W[i,j] * W_in[i,j] * W_out[i,j]

    T1 = T * alpha + (1 - alpha) * np.ones((n,n)) / n
    leval, levec = eigs(T1.T,1)
    levec = levec.reshape((n,))

    pagerank_score = np.abs(levec/np.sum(levec))

    pagerank_idx = np.argsort(-pagerank_score)

    univ_pagerank = [univ[i] for i in pagerank_idx]

    return pagerank_score, pagerank_idx, univ_pagerank

In [None]:
Win = W_in(W_cn)
Wout = W_out(W_cn)
pagerank_score, pagerank_idx, univ_pagerank = WeightedPageRank(0.9,Win,Wout,W_cn)
print(univ_pagerank)
rou, p_value = stats.spearmanr(research_idx,pagerank_idx)
print("rou = " + str(rou))
tau, p_value = stats.kendalltau(research_idx,pagerank_idx)
print("tau = " + str(tau))

['tsinghua', 'pku', 'sjtu', 'zsu', 'whu', 'nju', 'seu', 'ustc', 'zju', 'fudan', 'tju', 'hust', 'njau', 'cau', 'bfsu', 'hit', 'sdu', 'scut', 'gzsums', 'lzu', 'jlu', 'ecnu', 'bnu', 'swufe', 'tyut', 'nenu', 'fzu', 'jnu', 'scu', 'scau', 'neu', 'ccnu', 'cug', 'nankai', 'swjtu', 'dlut', 'tongji', 'nwpu', 'ruc', 'xidian', 'shufe', 'sicau', 'nwu', 'uestc', 'znufe', 'xju', 'cqu', 'njust', 'dlmu', 'njtu', 'bupt', 'cumt', 'nuaa', 'xmu', 'ouqd', 'njmu', 'shisu', 'njim', 'henu', 'bit', 'buaa', 'hzau', 'cupl', 'uibe', 'ustb', 'cpums', 'cun', 'csu', 'cdut', 'sdust', 'hfut', 'ecust', 'ccom', 'nip', 'shsmu', 'usst']
rou = 0.5933287764866713
tau = 0.4175438596491229


# Change the Google's hyperparameters

Spearman’s $\rho$ rank correlation coefficient

In [None]:
from scipy import stats

alphaset = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.85, 0.9])
n_alpha = alphaset.shape[0] # shape of alphaset
pageranks_idx = np.zeros((n,n_alpha))

for i in range (n_alpha):
    print("alpha = " + str(alphaset[i]))
    pageranks_idx[:,i] = PageRank(alphaset[i],W_cn)[1]
    rou, p_value = stats.spearmanr(research_idx,pageranks_idx[:,i])
    print("rou = " + str(rou))
    tau, p_value = stats.kendalltau(research_idx,pageranks_idx[:,i])
    print("tau = " + str(tau))

    univ_pagerank = PageRank(alphaset[i],W_cn)[2]
    print(univ_pagerank[0:5])

alpha = 0.1


NameError: name 'PageRank' is not defined

# Node ranking based on centralities

In [None]:
d=nx.degree_centrality(G)
degree_centrality_idx = np.argsort(list(d.values()))[::-1]
univ_degree_centrality = [univ[i] for i in degree_centrality_idx]
print(univ_degree_centrality)
rou, p_value = stats.spearmanr(research_idx,degree_centrality_idx)
print("rou = " + str(rou))
tau, p_value = stats.kendalltau(research_idx,degree_centrality_idx)
print("tau = " + str(tau))

['pku', 'tsinghua', 'nju', 'zsu', 'sjtu', 'scut', 'ecnu', 'swjtu', 'whu', 'njau', 'hit', 'seu', 'sdu', 'ustc', 'lzu', 'fudan', 'bnu', 'tju', 'jlu', 'cug', 'neu', 'tyut', 'gzsums', 'hust', 'xidian', 'shufe', 'xju', 'fzu', 'sicau', 'znufe', 'jnu', 'cau', 'nwpu', 'nenu', 'swufe', 'scau', 'ccnu', 'dlmu', 'scu', 'bfsu', 'zju', 'nwu', 'ruc', 'sdust', 'nankai', 'dlut', 'njtu', 'tongji', 'cqu', 'uestc', 'cdut', 'njust', 'cun', 'henu', 'cumt', 'nuaa', 'shisu', 'bupt', 'xmu', 'njmu', 'njim', 'ouqd', 'buaa', 'ecust', 'ustb', 'shsmu', 'ccom', 'bit', 'hfut', 'uibe', 'cupl', 'cpums', 'nip', 'hzau', 'usst', 'csu']
rou = 0.4388243335611756
tau = 0.3087719298245615


In [None]:
d = nx.closeness_centrality(G)
closeness_centrality_idx = np.argsort(list(d.values()))[::-1]
univ_closeness_centrality = [univ[i] for i in closeness_centrality_idx]
print(univ_closeness_centrality)
rou, p_value = stats.spearmanr(research_idx,closeness_centrality_idx)
print("rou = " + str(rou))
tau, p_value = stats.kendalltau(research_idx,closeness_centrality_idx)
print("tau = " + str(tau))

['pku', 'tsinghua', 'nju', 'uestc', 'sjtu', 'scut', 'zsu', 'zju', 'dlut', 'seu', 'whu', 'fudan', 'ustc', 'jnu', 'njau', 'xmu', 'nankai', 'bnu', 'tju', 'tongji', 'nuaa', 'swjtu', 'hit', 'ecnu', 'fzu', 'bupt', 'gzsums', 'neu', 'lzu', 'cumt', 'njtu', 'hust', 'njust', 'jlu', 'buaa', 'ecust', 'sdu', 'ruc', 'cqu', 'xidian', 'nwu', 'shisu', 'njmu', 'shufe', 'xju', 'tyut', 'dlmu', 'nenu', 'scau', 'cau', 'njim', 'ouqd', 'cug', 'nwpu', 'shsmu', 'ccom', 'hfut', 'ustb', 'henu', 'ccnu', 'znufe', 'sicau', 'swufe', 'bit', 'uibe', 'cpums', 'bfsu', 'scu', 'cupl', 'usst', 'hzau', 'csu', 'cun', 'cdut', 'sdust', 'nip']
rou = 0.6454135338345864
tau = 0.472280701754386


In [None]:
d = nx.betweenness_centrality(G)
betweenness_centrality_idx = np.argsort(list(d.values()))[::-1]
univ_betweenness_centrality = [univ[i] for i in betweenness_centrality_idx]
print(univ_betweenness_centrality)
rou, p_value = stats.spearmanr(research_idx,betweenness_centrality_idx)
print("rou = " + str(rou))
tau, p_value = stats.kendalltau(research_idx,betweenness_centrality_idx)
print("tau = " + str(tau))

['pku', 'tsinghua', 'sdu', 'bfsu', 'sjtu', 'nju', 'swjtu', 'ecnu', 'scut', 'lzu', 'zsu', 'njau', 'seu', 'jnu', 'bnu', 'fudan', 'whu', 'ustc', 'gzsums', 'swufe', 'hust', 'tju', 'ruc', 'jlu', 'cqu', 'fzu', 'hit', 'zju', 'cug', 'tyut', 'neu', 'shufe', 'xidian', 'dlut', 'scu', 'znufe', 'sicau', 'xju', 'dlmu', 'cau', 'nankai', 'ccnu', 'nenu', 'uestc', 'cumt', 'nwu', 'cdut', 'nwpu', 'njtu', 'scau', 'tongji', 'cupl', 'nuaa', 'njust', 'njmu', 'cun', 'bupt', 'henu', 'bit', 'ouqd', 'shisu', 'xmu', 'uibe', 'cpums', 'buaa', 'njim', 'hzau', 'usst', 'hfut', 'sdust', 'ustb', 'csu', 'ccom', 'nip', 'ecust', 'shsmu']
rou = 0.44855775803144227
tau = 0.29684210526315796
