In [9]:
import time
import datetime
import Hazard
from Utils.NetworkUtils import *
from Utils.Plot import *
from DynamicNetwork import DynamicNetwork
from HazardMLE import *

# Used to generate dynamic network
start_date = int(time.mktime(datetime.datetime.strptime("08/16/2016", "%m/%d/%Y").timetuple()))
file = "network_data/TheGoodPlace_sample.graphml"   # The good place network
WEEK_IN_SECOND = 7 * 24 * 60 * 60                   # Week in second
HAZARD_BETA = [0.1, 0.3, 0.3]

g = get_graphml(file)
g = sample(g, 10/len(g))      # Get a small sample of network (10 nodes)   
graph_info(g)

Graph nodes: 10, edges: 8


In [10]:
g = DynamicNetwork(g)   
# Hazard model simulation, using params [beta1, beta2, beta3] = [0.3, 0.3 ,0.3]
model = Hazard.Hazard(g, start_date, WEEK_IN_SECOND, HAZARD_BETA)        
ref_result, fake_data = model.hazard()
print("Adopted users every week: {}, {} steps.".format(ref_result, len(ref_result)))

Node 88796652 week 1471330800 Adoption Possibility 0.10000, got 0.36556, Not Adopted
Node 1118132442 week 1471330800 Adoption Possibility 0.10000, got 0.90103, Not Adopted
Node 1656968611 week 1471330800 Adoption Possibility 0.10000, got 0.36829, Not Adopted
Node 731845486274023424 week 1471330800 Adoption Possibility 0.10000, got 0.66475, Not Adopted
Node 64436697 week 1471330800 Adoption Possibility 0.10000, got 0.52847, Not Adopted
Node 399375794 week 1471330800 Adoption Possibility 0.10000, got 0.45980, Not Adopted
Node 570019372 week 1471330800 Adoption Possibility 0.10000, got 0.66833, Not Adopted
Node 18827700 week 1471330800 Adoption Possibility 0.34858, got 0.58038, Not Adopted
Node 117126460 week 1471330800 Adoption Possibility 0.10000, got 0.39932, Not Adopted
Node 139129297 week 1471330800 Adoption Possibility 0.10000, got 0.33051, Not Adopted
Node 88796652 week 1471935600 Adoption Possibility 0.10000, got 0.90903, Not Adopted
Node 1118132442 week 1471935600 Adoption Possib

In [11]:
def fake_train_data(fake_data):
    from pandas import DataFrame, Series
    train_data_exog = []
    train_data_endog = []
    
    # For demo only begin
    for k, i in fake_data.items():
        train_data_exog.append([k[0], k[1]] + [1] + list(i[1:]) + [i[0]])
        train_data_endog.append(i[0])
    train_data_exog.sort(key=lambda i: (i[0], i[1]))
    train_data_exog = DataFrame(
        train_data_exog, 
        columns=["NODEID", "SECONDS", "CONSTANT", "ADOPTED_NEIGHBORS", "SENTIMENT", "ADOPTION"])
    print(train_data_exog.to_string())
    # For demo only end
    #
    
    train_data_exog = []
    train_data_endog = []
    for k, i in fake_data.items():
        train_data_exog.append([1] + list(i[1:]))
        train_data_endog.append(i[0])
    train_data_exog = DataFrame(train_data_exog, columns=["CONSTANT", "ADOPTED_NEIGHBORS", "SENTIMENT"])
    train_data_endog = Series(train_data_endog, name="ADOPTION")

    return train_data_exog, train_data_endog

# Get exog and endog for mle
exog, endog = fake_train_data(fake_data)

                NODEID     SECONDS  CONSTANT  ADOPTED_NEIGHBORS  SENTIMENT  ADOPTION
0           1118132442  1471330800         1                0.0   0.167184         0
1           1118132442  1471935600         1                0.0  -0.957688         0
2           1118132442  1472540400         1                0.0   0.611058         0
3           1118132442  1473145200         1                0.0  -0.590297         0
4           1118132442  1473750000         1                0.0   0.245960         0
5           1118132442  1474354800         1                0.0  -0.847044         0
6           1118132442  1474959600         1                0.0  -0.835381         0
7           1118132442  1475564400         1                0.0  -0.131320         0
8           1118132442  1476169200         1                0.0  -0.308397         0
9           1118132442  1476774000         1                1.0  -0.574101         0
10          1118132442  1477378800         1                1.0  

In [12]:
result = HazardModel(exog=exog, endog=endog).fit(method="lbfgs", bounds=[(0.00001, .999), (0.00001, .999), (0.00001, .999)])
print("Beta values {}".format(result.params))

Beta values [  1.00000000e-05   1.00000000e-05   2.36919891e-01]


In [13]:
def print_loglikelihood(exogs, endogs, params):
    exogs = np.asarray(exogs)
    endogs = np.asarray(endogs)

    log_likelihood = 0

    for exog, endog in zip(exogs, endogs):
        if endog == 1:
            log_likelihood += stats.norm.logcdf(np.dot(exog, params)).sum()
        elif endog == 0:
            log_likelihood += stats.norm.logcdf(-1 * np.dot(exog, params)).sum()
        else:
            assert False, "Shouldn't run into this line"

    print("{}, {}".format(params, log_likelihood))
    
# params = [round(p, 3) for p in result.params] # Reset params to 3 digits after decimal
print("MLE loglikelihood")
print_loglikelihood(exog, endog, result.params)
print("Original loglikelihood")
print_loglikelihood(exog, endog, HAZARD_BETA)

MLE loglikelihood
[  1.00000000e-05   1.00000000e-05   2.36919891e-01], -35.688953646637856
Original loglikelihood
[0.1, 0.3, 0.3], -38.90919348097263
