# <center>COMPUTE PSI MODEL</center>

In [29]:
import sys
import numpy as np
from operator import itemgetter
from scipy.sparse.linalg import norm
from scipy.sparse import dok_matrix, csr_matrix, dia_matrix
from time import time

Load data and outpath.

In [47]:
# datafile = str(sys.argv[1])
# outdir = str(sys.argv[2])
# eps = float(sys.argv[3])
# maxiter = float(sys.argv[4])
# datafile = "../Datasets/wcano/wcano_tronc.txt"

eps = 0.001
maxiter = 1000
datafile = "../Datasets/test/test_rtid_2.txt"

## 1. Get graph and activity

Create author and newid dict (We want the users ids to be 0, 1, 2, ..., N. Create dict `newid` where old ids are keys and new ids are values.)

In [48]:
sys.stdout.write("Creating author and newid dict...")

Author, newid = dict(), dict()

for line in open(datafile):
    line = line.split()
    twid, uid = int(line[0]), int(line[2])
    Author[twid] = uid
    if uid not in newid:
        newid[uid] = -1
        
# create new ids as values of newid dict 
# using enumerate over sorted lsit of old ids
for i,u in enumerate(sorted(newid.keys())):
    newid[u] = i

Creating author and newid dict...

Get graph and activity.

In [49]:
Lambda, Mu, Leaders, Followers = dict(), dict(), dict(), dict()
FirstT = None

sys.stdout.write("Getting graph and activity infos...")
# iterate
for line in open(datafile):
    
    # split
    line = line.split()
    tstamp, uid, rtid = float(line[1]), newid[int(line[2])], int(line[3])
    
    # first timestamp
    if FirstT == None:
        FirstT = tstamp
        
    # new user ? add key to dicts
    if uid not in Lambda:
        Lambda[uid], Mu[uid], Leaders[uid], Followers[uid] = 0.0, 0.0, set(), set()
    
    # treat rtid
    if rtid == -1:
        Lambda[uid] += 1.0
    else: 
        Mu[uid] += 1.0
        if rtid in Author:
            auth = newid[Author[rtid]]
            Leaders[uid].add(auth)
            if auth not in Lambda:
                Lambda[auth], Mu[auth], Leaders[auth], Followers[auth] = 0.0, 0.0, set(), {uid}
            else:
                Followers[auth].add(uid)
                
# last timestamp        
LastT = tstamp

Getting graph and activity infos...

Divide lambdas and mus by total time.

In [50]:
T = float(LastT - FirstT)
for u in Lambda:
    Lambda[u] /= T
    Mu[u] /= T

Get nb of users.

In [51]:
N = len(Lambda)

Del useless variables to save RAM.

In [52]:
del Author, FirstT, LastT, auth, i, line, rtid, tstamp, twid, u, uid

## 2. Computing $\mathbf{p_i}$

We do not compute matrix $\mathbf{C}$ and vectors $\mathbf{d_i}$ yet. We generate vectors $\mathbf{b_i}$ on the fly for each user.

### 2.1 Create $\mathbf{A}$

In [53]:
print("Create matrix A...")
A = dok_matrix((N, N))
for j in Lambda:
    denominator = np.sum([Lambda[l] + Mu[l] for l in Leaders[j]])
    for k in Leaders[j]:
        A[j,k] =  Mu[k] / denominator
A = csr_matrix(A) # convert to csr matrix for faster arithmetic operations

Create matrix A...


### 2.2 Fixed point iterations

In [54]:
p = dict()
start = time()

# loop over users
for i in Lambda:
    
    # create vector b_i
    bi = dok_matrix((N,1))
    for j in Followers[i]:
        denominator = np.sum([Lambda[l] + Mu[l] for l in Leaders[j]])
        bi[j,0] = Lambda[i] / denominator
    bi = csr_matrix(bi) # convert

    # initialize p_i
    pi_0 = bi
    pi_1 = A.dot(pi_0) + bi
    nb_iter = 0
    
    # update p_i
    while norm(pi_0 - pi_1) > eps and nb_iter < maxiter:
        sys.stdout.flush()
        sys.stdout.write("Fixed point iteration nb {} for user {}... eps={}, elapsed time={:.3f}\r".format(nb_iter, i, eps, time()-start))
        pi_0 = pi_1
        pi_1 = A.dot(pi_0) + bi
        nb_iter += 1
    
    # add result to p dict
    p[i] = pi_1

Fixed point iteration nb 8 for user 2... eps=0.001, elapsed time=0.053

Del useless variables to save RAM.

In [55]:
del A, Followers, Leaders, T, bi, denominator, i, j, k, nb_iter, pi_0, pi_1, start, maxiter, eps

## 2. Computing $\mathbf{q_i}$

Now we can easily get the $\mathbf{q_i}$ using Eq (13) of Thm 2.

### 2.1 Create $\mathbf{C}$

In [56]:
print("Create matrix C...")
C = dok_matrix((N, N))
for j in Lambda:
    C[j,j] = Mu[j] / (Lambda[j] + Mu[j])
C = dia_matrix(C) # convert to csr matrix for faster arithmetic operations

Create matrix C...


### 2.2 Compute q from C, p and d

In [57]:
q = dict()
start = time()

# loop over users
for i in Lambda:
    
    sys.stdout.flush()
    sys.stdout.write("Computing q_{}... elapsed time={:.3f}\r".format(i, time()-start))
    
    # create vector d_i
    di = dok_matrix((N,1))
    di[i,0] = Lambda[i] / (Lambda[i] + Mu[i])
    di = csr_matrix(di) # convert

    # compute q_i
    q[i] = C.dot(p[i]) + di
    q[i][i,0] = 0 # force 0 on index i for pis computing

Computing q_2... elapsed time=0.004

Del useless variables.

In [60]:
del C, Lambda, Mu, di, i, j, start, p

## 3. Compute $\Psi$ and write to out

Compute Psi.

In [61]:
print("Computing Psi...\n")
Psi = { i: (N/(N-1))*q[i].mean() for i in q }

Computing Psi...



Del useless variables.

In [28]:
del csr_matrix, dia_matrix, dok_matrix, q, time

Sorts psis and create `oldid` dict to retrieve original ids.

In [234]:
sys.stdout.write("Sorting results...")
sorted_psis = sorted(Psi.items(), key=itemgetter(1), reverse=True)
del Psi

sys.stdout.write("Creating oldid dict...")
oldid = { u:v for v,u in newid.items() }
del newid

Sorting results...

Write results on outfile.

In [None]:
sys.stdout.write("Writing results...")
out = open(outdir + "PsiModel.txt")
for user,psi in sorted_psis:
    out.write(oldid[user], psi)
out.close()