# Poisson Hurdle Model Test
This shows that stan and statsmodels have very close results on a simulation dataset. This also cross validates both stan and statsmodels implemetations.

In [1]:
import sys
sys.path.append("..")
from models.poih import PoiH
sys.path.remove("..")

import numpy as np

In [2]:
def gen_pospoi(mu):
    k=1000
    while k>0:
        v=poisson.rvs(mu)
        if v>0:
            return v
        k=k-1
    return 0

def ztpoisson(lambda_par):
    """Zero truncated Poisson distribution."""

    temp = poisson.pmf(0, lambda_par)                
    p = [uniform.rvs(loc=item, scale=1-item) for item in temp]
    ztp = [int(poisson.ppf(p[i],lambda_par[i])) for i in range(len(lambda_par))]
  
    return np.array(ztp)

In [3]:
from scipy.stats import uniform, binom, poisson, bernoulli, norm
import statsmodels.api as sm

np.random.seed(1)                    # set seed to replicate example
nobs= 250000                         # number of obs in model 

x1 = binom.rvs(1, 0.7, size=nobs)
x2 = norm.rvs(loc=0, scale=1.0, size=nobs)

X = sm.add_constant(np.column_stack((x1, x2)))
X_infl = X[:,:2]

beta = [1.0, -0.6, -0.3]
xb = np.dot(X, beta)          # linear predictor

# betal = [1.0, -0.3, -0.5]
betal = [1.0, -0.3]
xl = np.dot(X_infl, betal)         # linear predictor

exb = np.exp(xb)
exc = 1.0 / (1.0 + np.exp(-xl))

p = bernoulli.rvs(exc)

phy=np.zeros(nobs)
for i in range(nobs):
    if p[i]>0:
        v = gen_pospoi(exb[i])
        phy[i]=v

In [4]:
# # this is an alternative way to generate poisson hurdle samples
# from scipy.stats import uniform, binom, poisson, bernoulli, norm
# import statsmodels.api as sm

# np.random.seed(1)                    # set seed to replicate example
# nobs= 250000                         # number of obs in model 

# x1 = binom.rvs(1, 0.7, size=nobs)
# x2 = norm.rvs(loc=0, scale=1.0, size=nobs)

# X = sm.add_constant(np.column_stack((x1, x2)))
# X_infl = X

# beta = [1.0, -0.6, -0.3]
# xb = np.dot(X, beta)          # linear predictor

# betal = [1.0, -0.3, -0.5]
# xl = np.dot(X, betal)         # linear predictor

# exb = np.exp(xb)
# exc = 1.0 / (1.0 + np.exp(-xl))

# p = bernoulli.rvs(exc)

# phy=p*ztpoisson(exb)

In [5]:
mod = PoiH(phy,X,exog_infl=X_infl,model_path='../models')

In [6]:
res0=mod.fit(method='stan')[0]
res1=mod.fit(method='statsmodels')[0]

true value of parameters

In [7]:
betal+beta

[1.0, -0.3, 1.0, -0.6, -0.3]

In [8]:
res0

{'params': array([ 1.00406835, -0.30395818,  0.9983285 , -0.60049126, -0.30103965]),
 'llf_logit': -154802.03944282015,
 'llf_poi': -248093.15764090483,
 'llf': -402895.197083725,
 'df': 5,
 'aic': 805800.39416745,
 'cpu_time': 1.2893235683441162,
 'model': 'poih',
 'method': 'stan'}

In [9]:
res1

{'params': array([ 1.00406835, -0.30395818,  0.99833044, -0.60049971, -0.3010394 ]),
 'llf_logit': -154802.03944282015,
 'llf_poi': -248093.15763819497,
 'llf': -402895.1970810151,
 'df': 5,
 'aic': 805800.3941620302,
 'cpu_time': 1.214252233505249,
 'model': 'poih',
 'method': 'statsmodels'}

In [10]:
res0['llf']-res1['llf']

-2.7099158614873886e-06

## Test init

In [11]:
res2=mod.fit(method='stan',start_params=res1['params'])[0]
res3=mod.fit(method='statsmodels',start_params=res0['params'])[0]

In [12]:
res2['llf']-res0['llf'],res3['llf']-res1['llf']

(2.720160409808159e-06, -2.7099158614873886e-06)

In [13]:
res2

{'params': array([ 1.00406835, -0.30395818,  0.99833043, -0.60049946, -0.3010396 ]),
 'llf_logit': -154802.0394428201,
 'llf_poi': -248093.15763818478,
 'llf': -402895.19708100485,
 'df': 5,
 'aic': 805800.3941620097,
 'cpu_time': 0.5623042583465576,
 'model': 'poih',
 'method': 'stan'}

In [14]:
res3

{'params': array([ 1.00406835, -0.30395818,  0.9983285 , -0.60049126, -0.30103965]),
 'llf_logit': -154802.0394428201,
 'llf_poi': -248093.15764090494,
 'llf': -402895.197083725,
 'df': 5,
 'aic': 805800.39416745,
 'cpu_time': 0.5592727661132812,
 'model': 'poih',
 'method': 'statsmodels'}