In [2]:
from itertools import chain
from collections import defaultdict
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from model import py_model as model
from model import power_law_distribution as dist

In [3]:
%matplotlib inline
np.random.seed(42)

# Example

In [4]:
num_users = 200
num_iterations = 2000

inv_cdf = dist.inv_cdf(2.7, 10**-3) 
actyivity_potentials = dist.inverse_transform_sampling(inv_cdf)

interactions, deleted_users = model.model(num_iterations, 
                                          num_users, 
                                          actyivity_potentials, 
                                          p_deletion=0.0005, 
                                          p_triadic_closure=0.95, 
                                          link_reinforecement_inc=1.0, 
                                          max_peer_influence_prob=0.05,
                                          peer_influence_thes=0.1,
                                          beta='avg_weight')

In [5]:
# transform list of events into a pandas DataFrame
interaction_log = pd.DataFrame(interactions, columns=['timestep', 'active_user', 'other_user', 'peer_influenced'])
interaction_log.tail()

Unnamed: 0,timestep,active_user,other_user,peer_influenced
1299,1990,121,2,False
1300,1994,252,317,False
1301,1995,394,105,False
1302,1996,171,242,False
1303,1997,185,321,False


In [6]:
# create multivariate time series
def create_ts(log, begin=None, end=None, dtype='int8'):
    maxima = np.max(log)
    max_user_id = int(np.max([maxima['active_user'], maxima['other_user']])) + 1   
    
    if begin:
        log = log[log['timestep'] >= begin]
    if end:
        log = log[log['timestep'] <= end]
    
    begin = np.min(log['timestep'])
    ts_length = np.max(log['timestep']) - np.min(log['timestep']) + 1
    ts = np.zeros(shape=(ts_length, max_user_id), dtype=dtype)

    for t, users in log.groupby('timestep'):
        for user_id in chain(users['active_user'], users['other_user']):
            ts[t - begin][user_id] =+ 1

    return pd.DataFrame(
        ts, 
        columns=['User {}'.format(user_id) for user_id in range(max_user_id)]
    )   

def filter_deleted_users(ts, delted_users):
    #TODO: resort the colums correctly
    return ts[ts.columns.difference(['User {}'.format(user_id) for user_id in deleted_users])]

ts = create_ts(interaction_log)
ts = filter_deleted_users(ts, deleted_users)
ts.tail()

Unnamed: 0,User 103,User 104,User 105,User 114,User 115,User 118,User 119,User 121,User 123,User 127,...,User 78,User 84,User 89,User 90,User 91,User 93,User 94,User 96,User 98,User 99
1992,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1993,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1994,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
# at which times was User 2 active?
ts[ts['User 2'] > 0].index

Int64Index([231, 637, 1054, 1205, 1362, 1477, 1711, 1712, 1853, 1989], dtype='int64')