In [50]:
import pandas as pd
from casIn.user_influence import P,influence
from scipy.stats import pearsonr
import numpy as np

In [51]:
import signal

class timeout:
    def __init__(self, seconds=1, error_message='Timeout'):
        self.seconds = seconds
        self.error_message = error_message
    def handle_timeout(self, signum, frame):
        raise TimeoutError(self.error_message)
    def __enter__(self):
        signal.signal(signal.SIGALRM, self.handle_timeout)
        signal.alarm(self.seconds)
    def __exit__(self, type, value, traceback):
        signal.alarm(0)

In [52]:
tweets = pd.read_csv('cascade_with_influence.csv')
cascades = pd.read_csv('metrics_retweet_cascade.csv')

tweets = tweets[['User', 'Time', 'followers_count']]
tweets = tweets.rename(columns={'Time': 'time', 'followers_count': 'magnitude', 'User': 'user_id'})

possible_cascade_size = cascades['size'].values

In [53]:
sample_actual = pd.DataFrame(columns=['user_id', 'sample', 'actual'])

In [54]:
sample_actual['user_id'] = tweets['user_id'].unique()

In [55]:
sample_actual = sample_actual.set_index('user_id')

In [56]:
sample_actual['sample'] = 0

In [57]:
sample_actual['actual'] = 0

In [58]:
def get_total_inf(df):
        return pd.Series({'total':sum(df['inf'])})

def get_influence(df):
    p_ij = P(df,r = -0.000068)
    
    with timeout(seconds=300):
        try:
            inf, m_ij = influence(p_ij)
            df['inf'] = inf
        except:
            df['inf'] = 0
            
    return df

In [59]:
total_cascades = 500000

In [60]:
for i in range(total_cascades):    
    curr_cascade_size = np.random.choice(possible_cascade_size) * 100

    actual_cascade = tweets.sample(curr_cascade_size).reset_index(drop=True)

    sample_cascade = actual_cascade.sample(frac=0.01).reset_index(drop=True)

    actual_influence = get_influence(actual_cascade)

    sample_influence = get_influence(sample_cascade)

    userwise_actual = actual_influence.groupby('user_id').apply(get_total_inf)

    userwise_sample = sample_influence.groupby('user_id').apply(get_total_inf)

    sample_actual = sample_actual.join(userwise_actual)
    sample_actual['total'] = sample_actual['total'].fillna(0)
    sample_actual['actual'] = sample_actual['actual'] + sample_actual['total']
    sample_actual = sample_actual.drop('total', axis=1)

    sample_actual = sample_actual.join(userwise_sample)
    sample_actual['total'] = sample_actual['total'].fillna(0)
    sample_actual['sample'] = sample_actual['sample'] + sample_actual['total']
    sample_actual = sample_actual.drop('total', axis=1)
    
print('done')

done


In [8]:
pearsonr(sample_actual['sample'],sample_actual['actual'])

(0.96, 0.0)