# Poisson Model Test
This shows that both stan and statsmodels get the same results on a simulation dataset.

In [1]:
import sys
sys.path.append("..")
from models.poi import Poi
sys.path.remove("..")

import numpy as np

In [2]:
from scipy.stats import norm, poisson, binom
import statsmodels.api as sm

# Data
np.random.seed(18472)                      # set seed to replicate example
nobs= 750                                  # number of obs in model 

x1_2 = binom.rvs(1, 0.7, size=nobs)
x2 = norm.rvs(loc=0, scale=1.0, size=nobs)

xb = 1 - 1.5 * x1_2  - 5.5 * x2            # linear predictor, xb           
exb = np.exp(xb)
py = poisson.rvs(exb)                      # create y as adjusted

X = sm.add_constant(np.column_stack((x1_2, x2)))

mydata = {}                                # build data dictionary
mydata['N'] = nobs                         # sample size
mydata['X'] = X                            # predictors         
mydata['Y'] = py                           # response variable
mydata['K'] = mydata['X'].shape[1]         # number of coefficients

In [3]:
mod = Poi(py,X,model_path='../models')

In [4]:
res0=mod.fit(method='stan',return_resid=True)[0]
res1=mod.fit(method='statsmodels',return_resid=True)[0]

In [5]:
res0

{'params': array([ 0.99834004, -1.49910058, -5.50045815]),
 'llf': -1448.709392893972,
 'df': 3,
 'aic': 2903.418785787944,
 'cpu_time': 0.004065752029418945,
 'model': 'poi',
 'method': 'stan',
 'mu': array([1.05702524e-01, 4.74128076e-04, 1.34046633e+00, 7.74400034e+00,
        1.60057353e+00, 3.72458578e-04, 1.93004546e-02, 5.32746057e+00,
        1.92187880e+06, 6.12257554e-05, 4.49658430e+00, 6.69341345e+02,
        6.17145084e-04, 1.39431404e-02, 1.54753687e-01, 8.26346561e-02,
        1.34540282e+00, 3.05170880e+00, 1.13573989e-04, 3.10686119e+00,
        4.76113948e-03, 1.56066448e-01, 2.68655748e-01, 5.26465991e+01,
        3.73115886e-02, 2.77324356e+02, 4.89907383e-03, 3.56460853e-01,
        1.01072061e+04, 3.01994785e-02, 7.55338475e-03, 4.79341385e+00,
        1.32148850e+03, 1.38984904e-03, 4.90690362e-04, 3.99945916e-04,
        2.03331124e-02, 3.05646728e-02, 6.23326793e+00, 5.07276545e-04,
        2.79998859e+00, 4.70152865e-03, 2.26820280e+06, 4.36076454e+02,
       

In [6]:
res1

{'params': array([ 0.99834132, -1.49910082, -5.5004577 ]),
 'llf': -1448.7093924593232,
 'df': 3,
 'aic': 2903.4187849186465,
 'cpu_time': 0.0019330978393554688,
 'model': 'poi',
 'method': 'statsmodels',
 'mu': array([1.05702650e-01, 4.74129020e-04, 1.34046765e+00, 7.74400688e+00,
        1.60057508e+00, 3.72459194e-04, 1.93004804e-02, 5.32746523e+00,
        1.92187916e+06, 6.12258657e-05, 4.49658830e+00, 6.69341667e+02,
        6.17146080e-04, 1.39431594e-02, 1.54753922e-01, 8.26347564e-02,
        1.34540463e+00, 3.05171161e+00, 1.13574188e-04, 3.10686515e+00,
        4.76114636e-03, 1.56066685e-01, 2.68656049e-01, 5.26466353e+01,
        3.73116496e-02, 2.77324608e+02, 4.89908265e-03, 3.56461244e-01,
        1.01072087e+04, 3.01995176e-02, 7.55339539e-03, 4.79341808e+00,
        1.32148906e+03, 1.38985119e-03, 4.90691337e-04, 3.99946576e-04,
        2.03331394e-02, 3.05647233e-02, 6.23327331e+00, 5.07277552e-04,
        2.79999119e+00, 4.70153546e-03, 2.26820239e+06, 4.36076834e+0

In [7]:
np.sum(np.abs(res0['mu']-np.exp(np.dot(X,res0['params']))))

0.0

In [8]:
np.sum(np.abs(res1['mu']-np.exp(np.dot(X,res1['params']))))

0.0

In [9]:
res0['df_resid'],res1['df_resid']

(747, 747.0)

In [10]:
res0['llf']-res1['llf']

-4.3464888221933506e-07