In [5]:
from sympy import *
simplify(solveset("-x**3 + 3*x + 1", "x"))

{-2*sin(pi/18), -2*sin(5*pi/18), 2*cos(pi/9)}

# <center> Social Platform Emulator </center>

In [1]:
import sys
import util
import random
import numpy as np
from time import time
from operator import itemgetter

In [2]:
dataset = 'wcano'

data_path, RTU, truegraph = util.load_data(dataset)
out_path = "../PsiResults/{}/Psi_emul.txt".format(dataset)

Wall is finite for all users. We apply FIFO replacement principle. The Newsfeed is not needed, because we have no information on the policy of the OSP or the user policy to choose among many. We know: 1) which post is tweeted-retweeted and the time-stamp, and the tweet origin.

For each user we create a Wall list of max size $K$.

In [12]:
K = 1 # Wall, Newsfeed list size
Wall = dict() # les walls

If we have RTids and not RTusers we create the `Author` dict.

In [13]:
if not RTU:
    
    # create author dict
    Author = dict()
    for lign in open(data_path):
        lign = lign.split()
        tweetid, userid = int(lign[0]), int(lign[2])
        Author[tweetid] = userid
        
    # check length and other stuff
    print("Length Author dict : ", len(Author))
    print("74400 in Author : ", 74400 in Author)

Length Author dict :  8
74400 in Author :  False


In [14]:
if not RTU:
    print(Author)

{0: 0, 1: 1, 2: 2, 3: 0, 4: 1, 5: 2, 6: 2, 7: 3}


We introduce a disctionary `Q`, which contains many dictionries, each related to a userid. $Q[userid][auth-out]$ saves the time periods that a post from $[auth-out]$ stays on the Wall of $[userid]$.

In [15]:
Q = dict()
FirstP = dict()

**Note:** Are we happy with Q as matrix? or list of lists? or txt? or dictionary? **A: Dictionary of Dictionaries**

**Important! In the case of RTids :**

if the tweet is original then the person who posted is the author.
    else it is a retweet. If the original post was observed and saved in "Authors",
   we know the author.
   else the retweet has origin outside the dataset. In this case we assume that
   the first retweeter is the original author.

In [16]:
Amphitrions = dict() # for each user, gathers the users present on his wall in case of a tie
Time0 = None # time of the first event
timestamp_old = 0 # for time tracking
count_ties = 0 # used in case of multiple posts on the same wall at the same time
 

# iterate
start = time()
for i,lign in enumerate(open(data_path)):
    
    # print state
    if i%10000==0:
        sys.stdout.flush()
        sys.stdout.write("line {}... elapsed time {}\r".format(i, time()-start))

    # split
    lign = lign.split()
    if RTU:
        tweetid, tstamp, userid, rtu = int(lign[0]), int(lign[1]), int(lign[2]), int(lign[3])
    else:
        tweetid, tstamp, userid, rtid = int(lign[0]), int(lign[1]), int(lign[2]), int(lign[3])  
    
    # time tracking
    if Time0 == None:
        Time0 = tstamp
    if tstamp == timestamp_old:
        count_ties+=1
    timestamp_old = tstamp
    
    # add userid to Amphitrions
    if userid not in Amphitrions:
        Amphitrions[userid] = dict()

    # rtu treatment
    if RTU:
        if rtu == -1:
            auth = userid
        else:
            auth = rtu
    
    # else rtid treatment
    else:
        if rtid == -1:
            auth = userid
        elif rtid in Author:
            auth = Author[rtid] # it is the tweet origin ID
        else:
            auth = userid
        
    # add userid to Wall and Q dicts
    if userid not in Wall:
        Wall[userid] = []
        Q[userid] = dict()
        
    # is it the user's first post ? if so update FirstP dict
    if len(Wall[userid])==0:
        FirstP[userid] = tstamp
    
    # add new post to userid's wall
    Wall[userid].append((tweetid,tstamp,auth))
    
    # if there is already a post on the wall
    if len(Wall[userid]) > K:
        
        # update auth_out and time_wall
        auth_out = Wall[userid][0][2]
        time_wall = tstamp-Wall[userid][0][1]

        # add auth_out to Q[userid]
        if auth_out not in Q[userid]:
            Q[userid][auth_out] = 0

        # update AmphiAmphitrions[userid] with auth_out
        if auth_out not in Amphitrions[userid]:
            Amphitrions[userid][auth_out] = 0 
        Amphitrions[userid][auth_out] += 1
          
        # if no tie (or end of a tie)
        if time_wall != 0:
            Ndt = 0 # reinit number of ties
            for author in Amphitrions[userid]: # count ties
                Ndt += Amphitrions[userid][author]
            for author in Amphitrions[userid]: # update Q[userid]
                Q[userid][author] += time_wall / Ndt * Amphitrions[userid][author]
            Amphitrions[userid] = dict() # reinit Amphitrions[userid]
            
        # keep the most recent post on wall (<=> random eviction because K=1)
        Wall[userid] = Wall[userid][1:]
        assert( len(Wall[userid]) == K )

        
print("\nNb ties : ", count_ties)

line 0... elapsed time 0.0
Nb ties :  1


**Note1:** EndPoint of simulation is the time of last arrival.

In [17]:
print("Init time : ", Time0)
EndP = tstamp
print("EndPoint of simulation : ", EndP)

if not RTU:
    print("Length Author dict : ", len(Author))

Init time :  0
EndPoint of simulation :  7
Length Author dict :  8


**Note2:** Post-processing: Treat extra users with Wall not empty at the end of the simulation. In this case, we consider that the post on their wall was also there when the simulation started and until the first post of the concerned user.

In [18]:
# iterate over all walls
for u in Wall:
    
    # if wall not empty
    if len(Wall[u]) > 0:
        
        # get auth_out and time on wall
        auth_out = Wall[u][0][2]
        time_wall = EndP-Wall[u][0][1]
        
        # update Q[u] with auth_out
        if u not in Q:
            Q[u] = dict()
        if auth_out not in Q[u]:
            Q[u][auth_out] = 0

        # update Amphitrions[u]
        if auth_out not in Amphitrions[u]:
            Amphitrions[u][auth_out] = 0 
        Amphitrions[u][auth_out] += 1
        
        # if no tie (or end of a tie)
        if time_wall != 0:
            Ndt = 0 # reinit nb ties
            for author in Amphitrions[u]: # count ties
                Ndt += Amphitrions[u][author]
            for author in Amphitrions[u]: # update Q[u] with AmphitAmphitrions
                Q[u][author] += ( time_wall + FirstP[u] - Time0 ) / Ndt * Amphitrions[u][author]
            Amphitrions[u] = dict() # reinit Amphitrions[u]

In [19]:
user = random.choice(list(Q.keys()))
print("Q of user {} : {}".format(user, Q[user]))

Q of user 0 : {0: 3.0, 1: 4.0}


**Estimated Qs**
We now compoute the end values for `Q` by dividing each one with the total duration of the simulation.

In [20]:
EstQ = dict()
nb0 = 0

for u in Q:
    
    # we don't consider users that made their first post at the last timestamp
    if EndP - FirstP[u] == 0:
        nb0 += 1
        assert len(Q[u]) == 1
    
    # otherwise we compute the final value of Q for the user u
    else:
        EstQ[u] = { j: Q[u][j]/(EndP - Time0) for j in Q[u] }

print(nb0)

1


In [21]:
EstQ

{0: {0: 0.42857142857142855, 1: 0.5714285714285714},
 1: {1: 0.42857142857142855, 2: 0.5714285714285714},
 2: {2: 0.42857142857142855, 0: 0.14285714285714285, 1: 0.42857142857142855}}

Print length of `EstQ`.

In [12]:
N = len(EstQ)
print("Length of EstQ dict : ", N)

Length of EstQ dict :  5804715


In [13]:
print("EstQ of user {} : {}".format(user, EstQ[user]))

EstQ of user 1269040 : {50244: 1.0}


**EstQ[user][leader]:** is the proportion of time that posts of "leader" is on 1-st position of "user" Wall. Hence:
$\sum_{leaders}EstQ[user][leader] = 1$.

Calculate Influence of user on his followers.

In [14]:
Influence = dict()

for follower in EstQ:
    for leader in EstQ[follower]:
        
        if leader not in Influence:
            Influence[leader] = dict()
            
        Influence[leader][follower] = EstQ[follower][leader]

In [15]:
leader = random.choice(list(EstQ[user].keys()))
print("Influence of user {} : {}".format(leader, Influence[leader]))

Influence of user 50244 : {27056: 0.022553399445239188, 262: 6.141025085764264e-06, 12780: 0.011259892706595396, 28295: 1.0342779091813497e-05, 25714: 0.01834550441410419, 28887: 0.035338690461953766, 27637: 0.0253066795606646, 26414: 4.896659476280452e-05, 22178: 0.00024774188043359517, 14227: 0.012278171629369098, 25890: 0.22058610589842229, 27965: 0.006918349576883372, 27321: 9.696355398575153e-07, 25993: 0.03788963996139558, 21383: 0.0005754786929054353, 55816: 0.06106683827060976, 1366: 0.04098617105793055, 1994: 0.30147730438734227, 73789: 0.09496836725657139, 13039: 0.0003906015166392691, 33535: 0.0023143584277165795, 63739: 0.015397327555167414, 62708: 0.05344016993186048, 35127: 0.0002336821651056612, 34539: 0.010543332042640692, 34323: 3.78157860544431e-05, 34951: 3.2321184661917177e-06, 34082: 5.446119615533044e-05, 33507: 0.041470827221936, 31277: 0.0005990731577086349, 34000: 0.00010795275677080337, 43525: 0.20753998292148604, 30632: 0.00040352999050403596, 30847: 0.148985

Compute final values for $\psi$.

In [16]:
Psi = dict()
for user in Influence:
    Psi[user] = 0
    for follower in Influence[user]:
        if follower != user:
            Psi[user]+=Influence[user][follower]
    Psi[user] = Psi[user]/(N-1)

In [17]:
print("Psi of user {} : {}".format(leader, Psi[leader]))

Psi of user 50244 : 0.0011760710639163547


**Export sorted list of $\psi$.**

In [18]:
Psi = sorted(Psi.items(), key=itemgetter(1), reverse=True)

In [19]:
with open(out_path, 'w') as f:
    for p in Psi:
        f.write("%d %g\n"%(p[0], p[1]))

In [20]:
print(Psi[-5:])

[(2919178, 0.0), (5875541, 0.0), (5870126, 0.0), (5875546, 0.0), (5821264, 0.0)]
