In [3]:
import numpy as np
import argparse
import json
import sys
import os
import math
from scipy.optimize import fsolve
from scipy import log
import random
import datetime


# External libraries
import torch
from torch import optim

# Internal libraries

import models
import posteriors
import priors
import hawkes_model, excitation_kernels
import learners
import utils

from make_data_for_samples import make_data              #多个样本数据
from make_data_for_estimate import make_estimate_data    #单个样本数据

def make_object(module, name, args):
    return getattr(module, name)(**args)

#def learn_vi(events, end_time, vi_seed, adjacency_true, inference_param_dict, return_learner=False):
def learn_vi(events, vi_seed, inference_param_dict, return_learner=False):
    # Extract some parameters for easier access
    #n_nodes = len(events)
    n_events = len(events)
    n_nodes = len(events[0])
    M = inference_param_dict['excitation']['args'].get('M', 1)
    print('M is:',M)
    n_params = n_nodes * (n_nodes * M + 1)
    n_edges = M * n_nodes ** 2
    # Set seed
    np.random.seed(vi_seed)
    # Set starting pointM * n_nodes ** 2
    x0 = torch.tensor(
        np.hstack((
            np.hstack((  # alpha, the mean of the parameters
                np.random.normal(loc=0.1, scale=0.1, size=n_nodes),
                np.random.normal(loc=0.1, scale=0.1, size=n_edges),)),
            np.hstack((  # beta=log(sigma), log of the variance of the parameters
                np.log(np.clip(np.random.normal(loc=0.2, scale=0.1, size=n_nodes), 1e-1, 2.0)),
                np.log(np.clip(np.random.normal(loc=0.2, scale=0.1, size=n_edges), 1e-1, 2.0)),))
        )),
        dtype=torch.float64, requires_grad=True
    )
    # Init Hawkes process model object
    excitation_obj = make_object(excitation_kernels, **inference_param_dict['excitation'])
    hawkes_model_obj = hawkes_model.HawkesModel(excitation=excitation_obj, verbose=False)
    # Init the posterior object
    posterior_obj = make_object(posteriors, **inference_param_dict['posterior'])
    # Init the prior object
    prior_type = inference_param_dict['prior']['name']
    prior_args = inference_param_dict['prior']['args']
    prior_args['C'] = torch.tensor(prior_args['C'], dtype=torch.float64)  # cast to tensor
    prior_obj = make_object(priors, prior_type, prior_args)
    # Init the variational inference model object
    model = models.ModelHawkesVariational(
        model=hawkes_model_obj, posterior=posterior_obj, prior=prior_obj,
        **inference_param_dict['model']['args'])
   
    # Init the optimizer
    opt_type = inference_param_dict['optimizer']['name']
    opt_args = inference_param_dict['optimizer']['args']
    opt = getattr(optim, opt_type)([x0], **opt_args)
    # Init learner
    learner = learners.VariationalInferenceLearner(
        model=model, optimizer=opt, **inference_param_dict['learner']['args'])
    # Fit the model
    events_t = [torch.tensor(events_i) for events_i in events]  # cast to tensor
    learner.fit(events_t, x0=x0, callback=None)
    print()
    if return_learner:
        return learner
    # Extract the mode of the posterior
    z_est_mode = learner.model.posterior.mode(learner.coeffs[:n_params], learner.coeffs[n_params:])
    adj_est_ora = z_est_mode[n_nodes:].detach()
    mu_est_ora = z_est_mode[:n_nodes].detach()
    adj_est_ora = adj_est_ora.view(n_nodes, n_nodes, M)
    adj_est = z_est_mode[n_nodes:].detach().numpy()
    adj_est = np.reshape(adj_est, (n_nodes, n_nodes, M)).sum(-1).ravel()
    mu_est = z_est_mode[:n_nodes].detach().numpy()
    #mu_est = np.reshape(mu_est,n_nodes).ravel()
    coeffs_est = learner.coeffs.detach().numpy()
    log_like_sum = hawkes_model_obj.log_likelihood(mu_est_ora,adj_est_ora)
    
    return coeffs_est, adj_est,mu_est


def run(user,start_line,end_line,exp_dir, output_filename, decay_rate=0.01, stdout=None, stderr=None):
    # Reset random seed
    np.random.seed(None)

    if stdout is not None:
        sys.stdout = open(stdout, 'w')
    if stderr is not None:
        sys.stderr = open(stderr, 'w')

    print('\nExperiment parameters')
    print('=====================')
    print(f'        exp_dir = {exp_dir:s}')
    print(f'output_filename = {output_filename:s}')
    print(flush=True)
    print('\nStart time is: ', datetime.datetime.today())

    result_dict = {}
    
    data_fileName = "./data/DSL-StrongPasswordData.xls"
    global events
    #events = make_data('s036',12350,12400,data_fileName)
    event = make_data(user,start_line,end_line,data_fileName)
    
    '''
    ######一次只训练一个样本，复制多个
    times = 20
    
    #print (event)
    events = []
    
    for i in range(times):
        events.extend(event)
    print(events)
    ######
    '''
    
    n_jumps_per_dim = list(map(len, event[0]))
    print(event)
    print('\nNumber of jumps:', len(event[0])*sum(n_jumps_per_dim))
    print('\nper node:', n_jumps_per_dim)
    
    #11+121
    #C_list = [1.0]*132
    #11+121*M
    M = 11
    C_list = [0.1]*(11+121*M)
    
    #LaplacePrior
    '''
    param_dict={'inference':{'vi_exp':{'excitation': {'name': 'ExponentialKernel','args': {'decay': 0.01, 'cut_off': 1000.0}}, 
                          'posterior': {'name': 'LogNormalPosterior', 'args': {}},
                          'prior': {'name': 'LaplacePrior', 'args': {'dim': 11, 'n_params': 132, 'C': C_list}}, 
                          'model': {'args': {'n_samples': 1, 'n_weights': 1, 'weight_temp': 1.0}}, 
                          'optimizer': {'name': 'Adam', 'args': {'lr': 0.01}}, 
                          'learner': {'args': {'tol': 1e-04, 'lr_gamma': 0.9999, 'max_iter': 20000, 'hyperparam_momentum': 0.5, 'hyperparam_interval': 100, 'hyperparam_offset': 0}}}}
               }
    
    param_dict['inference']['vi_exp']['excitation']['args']['decay'] = decay_rate
    print (param_dict)
    '''
    

    param_dict={'inference':{'vi_exp':{'excitation': {'name': 'MixtureGaussianFilter','args': {'M': 11, "end_time": 36, 'cut_off': 1000.0}}, 
                          'posterior': {'name': 'LogNormalPosterior', 'args': {}},
                          'prior': {'name': 'GaussianLaplacianPrior', 'args': {'dim': 11, 'n_params': 1342, 'C': C_list}}, 
                          'model': {'args': {'n_samples': 1, 'n_weights': 1, 'weight_temp': 1.0}}, 
                          'optimizer': {'name': 'Adam', 'args': {'lr': 0.01}}, 
                          'learner': {'args': {'tol': 1e-04, 'lr_gamma': 0.9999, 'max_iter': 20000, 'hyperparam_momentum': 0.5, 'hyperparam_interval': 100, 'hyperparam_offset': 0}}}}
               }
    #param_dict['inference']['vi_exp']['excitation']['args']['decay'] = decay_rate
    #print (param_dict)
    
    
    

    print('\nINFERENCE')
    print('=========')

    for key, inference_param_dict in param_dict['inference'].items():
        if key.startswith('vi'):
            print(f'\nRun VI ({key:s})')
            print('------')
            # Set random seed (for reproducibility)
            np.random.seed()  # Reset random number generator to avoid dependency on simulation seed
            #vi_seed = np.random.randint(2**32 - 1)
            vi_seed = np.random.randint(2**16 - 1)
            print(f'vi random seed: {vi_seed}')
            # Run inference
            #global intens_sum
            global integral_instesity
            #coeffs_var, adj_var, mu_var, nu, varsigma = learn_vi(events, vi_seed, inference_param_dict)
            coeffs_var, adj_var, mu_var = learn_vi(event[0], vi_seed, inference_param_dict)
            #模型参数
            adj_var = adj_var.ravel()
            mu_var = mu_var.ravel()            
          
          
            result_dict.update({
                key: {
                    'vi_seed': vi_seed,             # VI random seed
                    'coeffs': coeffs_var.tolist(),  # VI parameters
                    'adjacency': adj_var.tolist(),  # VI Estimator
                    'mu':  mu_var.tolist(),
                }
            })
 

    #print('\n\nSave results...')
    
    #print('\ncoeffs:',  coeffs_var.tolist())
    #print( '\nadjacency:', adj_var.tolist())
    #print('\nmu:', mu_var.tolist())
    #print('\nvarsigma:',varsigma)

    with open(os.path.join(exp_dir, output_filename), 'w') as output_file:
        json.dump(result_dict, output_file)

    # Log that the run is finished
    print('\n\nFinished.')
    print('\nEnd time is: ', datetime.datetime.today())


if __name__ == "__main__":

    userlist = ['s002','s032','s036','s047','s052']
    start_line = 0
    end_line = 1
    decay_rate = 0.1
    
    
    
    for _,user in enumerate (userlist):
        out_put_filename = 'C01'+user+'_'+str(start_line)+'_'+str(end_line)+'decay'+str(decay_rate)+'.json'
        run (user=user,start_line=start_line,end_line=end_line,exp_dir='.',output_filename=out_put_filename,decay_rate=decay_rate)      


    


Experiment parameters
        exp_dir = .
output_filename = C01s002_0_1decay0.1.json


Start time is:  2020-10-13 22:02:37.487234
[[array([1.000e-06, 1.491e-01]), array([0.3979, 0.5048]), array([0.5653, 0.6822]), array([0.7865, 0.9282]), array([1.975 , 2.0896]), array([3.5805, 3.6872]), array([4.3395, 4.4411]), array([4.5531, 4.688 ]), array([4.7015, 4.7947]), array([5.053 , 5.1868]), array([5.4039, 5.4781])]]

Number of jumps: 242

per node: [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]

INFERENCE

Run VI (vi_exp)
------
vi random seed: 6891
M is: 11


RuntimeError: The size of tensor a (1342) must match the size of tensor b (0) at non-singleton dimension 0

In [11]:
event

NameError: name 'event' is not defined