In [9]:
import time
import datetime
import Hazard
from Utils.NetworkUtils import *
from Utils.Plot import *
from DynamicNetwork import DynamicNetwork
from HazardMLE import *

# Used to generate dynamic network
start_date = int(time.mktime(datetime.datetime.strptime("08/16/2016", "%m/%d/%Y").timetuple()))
file = "network_data/TheGoodPlace_sample.graphml"   # The good place network
WEEK_IN_SECOND = 7 * 24 * 60 * 60                   # Week in second
HAZARD_BETA = [0.1, 0.1, 0.1]

g = get_graphml(file)
g = sample(g, 10/len(g))      # Get a small sample of network (10 nodes)   
graph_info(g)

Graph nodes: 10, edges: 8


In [10]:
g = DynamicNetwork(g)   
# Hazard model simulation, using params [beta1, beta2, beta3] = [0.3, 0.3 ,0.3]
model = Hazard.Hazard(g, start_date, WEEK_IN_SECOND, HAZARD_BETA)        
ref_result, fake_data = model.hazard()
print("Adopted users every week: {}, {} steps.".format(ref_result, len(ref_result)))

Adoption Possibility 0.10000, got 0.21478, Not Adopted
Adoption Possibility 0.10000, got 0.79843, Not Adopted
Adoption Possibility 0.10000, got 0.49281, Not Adopted
Adoption Possibility 0.10000, got 0.23249, Not Adopted
Adoption Possibility 0.10000, got 0.41823, Not Adopted
Adoption Possibility 0.10000, got 0.34055, Not Adopted
Adoption Possibility 0.10000, got 0.36731, Not Adopted
Adoption Possibility 0.10000, got 0.66949, Not Adopted
Adoption Possibility 0.10000, got 0.22565, Not Adopted
Adoption Possibility 0.10000, got 0.71367, Not Adopted
Adoption Possibility 0.10000, got 0.64148, Not Adopted
Adoption Possibility 0.10000, got 0.23146, Not Adopted
Adoption Possibility 0.10000, got 0.53630, Not Adopted
Adoption Possibility 0.10000, got 0.12659, Not Adopted
Adoption Possibility 0.10000, got 0.79701, Not Adopted
Adoption Possibility 0.10000, got 0.40655, Not Adopted
Adoption Possibility 0.10000, got 0.46474, Not Adopted
Adoption Possibility 0.10000, got 0.66852, Not Adopted
Adoption P

In [11]:
def fake_train_data(fake_data):
    from pandas import DataFrame, Series
    train_data_exog = []
    train_data_endog = []
    
    # For demo only begin
    for k, i in fake_data.items():
        train_data_exog.append([k[0], k[1]] + [1] + list(i[1:]) + [i[0]])
        train_data_endog.append(i[0])
    train_data_exog.sort(key=lambda i: (i[0], i[1]))
    train_data_exog = DataFrame(
        train_data_exog, 
        columns=["NODEID", "SECONDS", "CONSTANT", "ADOPTED_NEIGHBORS", "SENTIMENT", "ADOPTION"])
    print(train_data_exog.to_string())
    # For demo only end
    #
    
    train_data_exog = []
    train_data_endog = []
    for k, i in fake_data.items():
        train_data_exog.append([1] + list(i[1:]))
        train_data_endog.append(i[0])
    train_data_exog = DataFrame(train_data_exog, columns=["CONSTANT", "ADOPTED_NEIGHBORS", "SENTIMENT"])
    train_data_endog = Series(train_data_endog, name="ADOPTION")

    return train_data_exog, train_data_endog

# Get exog and endog for mle
exog, endog = fake_train_data(fake_data)

                NODEID     SECONDS  CONSTANT  ADOPTED_NEIGHBORS  SENTIMENT  ADOPTION
0            146318194  1471330800         1                  0  -0.641097         0
1            146318194  1471935600         1                  1  -0.004498         0
2            146318194  1472540400         1                  2  -0.621756         0
3            146318194  1473145200         1                  3   0.935530         0
4            146318194  1473750000         1                  4  -0.754987         0
5            146318194  1474354800         1                  5   0.684619         0
6            146318194  1474959600         1                  6   0.452047         0
7            146318194  1475564400         1                  7  -0.855246         0
8            146318194  1476169200         1                  8  -0.158783         0
9            146318194  1476774000         1                  9   0.188584         0
10           146318194  1477378800         1                 10  

In [15]:
result = HazardModel(exog=exog, endog=endog).fit(method="lbfgs", bounds=[(0.00001, .999), (0.00001, .999), (0.00001, .999)])
print("Beta values {}".format(result.params))

Beta values [  1.00000000e-05   1.00000000e-05   7.32540455e-02]


In [15]:
def print_loglikelihood(exogs, endogs, params):
    exogs = np.asarray(exogs)
    endogs = np.asarray(endogs)

    log_likelihood = 0

    for exog, endog in zip(exogs, endogs):
        if endog == 1:
            log_likelihood += stats.norm.logcdf(np.dot(exog, params)).sum()
        elif endog == 0:
            log_likelihood += stats.norm.logcdf(-1 * np.dot(exog, params)).sum()
        else:
            assert False, "Shouldn't run into this line"

    print("{}, {}".format(params, log_likelihood))
    
# params = [round(p, 3) for p in result.params] # Reset params to 3 digits after decimal
print("MLE loglikelihood")
print_loglikelihood(exog, endog, result.params)
print("Original loglikelihood")
print_loglikelihood(exog, endog, HAZARD_BETA)

MLE loglikelihood
[  1.00000000e-05   1.00000000e-05   7.32540455e-02], -52.64501631744964
Original loglikelihood
[0.1, 0.1, 0.1], -104.63422007228706
