In [1]:
import pandas as pd
import numpy as np
import string

from sklearn.datasets import make_regression

### Make Data

In [57]:
X, y, coef = make_regression(n_samples=1000, n_features=10, n_informative=5, coef=True, noise=0.1)
feature_names = list(string.ascii_uppercase[:X.shape[1]])
response_name = 'y'

df = pd.DataFrame(X, columns=feature_names)
df[response_name] = y
df.head()

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,y
0,-1.485952,0.619478,0.035315,1.582989,0.249205,-0.268111,0.295561,0.617178,0.259455,0.641988,117.640877
1,0.783167,0.484613,0.674671,0.37493,-0.316085,-2.612286,-1.966553,-1.275022,-0.113987,-1.130114,-107.292876
2,1.33826,0.961702,0.074892,-2.926051,0.96087,-1.171354,-0.752707,-1.244668,-0.122195,-0.367483,-144.000556
3,0.38717,-0.029594,0.950295,-0.977101,-0.063491,-0.559599,-1.526286,-0.422269,0.129235,0.606209,-111.502357
4,-0.082833,0.500725,0.540926,0.899007,-0.076644,0.552827,-0.726187,-0.517257,0.159257,-1.669069,-3.708838


Low level estimator dev

In [58]:
coef

array([ 0.        ,  0.        ,  0.3436227 , 45.90076004, 58.76923627,
        0.        , 32.15740561, 33.67823095,  0.        ,  0.        ])

### Data Input

In [73]:
data_input = dict(
    NUM_OF_OBS=df.shape[0],
    RESPONSE=df[response_name],
    NUM_OF_PR=0,
    PR_MAT=np.zeros(shape=(df.shape[0],0)),
    PR_BETA_PRIOR=np.array([]),
    PR_SIGMA_PRIOR=np.array([]),
    NUM_OF_RR=df[feature_names].shape[1],
    RR_MAT=df[feature_names].values,
    RR_BETA_PRIOR=np.zeros(shape=(df[feature_names].shape[1],)),
    RR_SIGMA_PRIOR=np.full(shape=(df[feature_names].shape[1],), fill_value=5.),
    REG_PENALTY_TYPE=0,
    AUTO_RIDGE_SCALE=0.5,
    LASSO_SCALE=0.5,
)


### MCMC

In [74]:
from orbit.estimators.stan_estimator import StanEstimatorMCMC, StanEstimatorMAP, StanEstimatorVI

estimator = StanEstimatorMCMC(num_sample=500)
stan_extract = estimator.fit(
    model_name='linear_regression',
    model_param_names=['pr_beta', 'rr_beta'],
    data_input=data_input
)

In [75]:
coef_1 = np.round(stan_extract['rr_beta'].mean(axis=0),4)

### MAP

In [76]:
estimator = StanEstimatorMAP()
stan_extract = estimator.fit(
    model_name='linear_regression',
    model_param_names=['pr_beta', 'rr_beta'],
    data_input=data_input
)

In [77]:
coef_2 = np.round(stan_extract['rr_beta'], 4)

### VI

In [78]:
estimator = StanEstimatorVI(num_sample=500)
stan_extract = estimator.fit(
    model_name='linear_regression',
    model_param_names=['pr_beta', 'rr_beta'],
    data_input=data_input
)
coef_3 = np.round(stan_extract['rr_beta'].mean(axis=0), 4)



### Compare Coefs

In [79]:
pd.DataFrame(
    np.concatenate((coef[:, None], coef_1[:, None], coef_2[:, None], coef_3[:, None]),
        axis=1
    ),
    columns=['true', 'mcmc', 'map', 'vi']
 )

Unnamed: 0,true,mcmc,map,vi
0,0.0,-0.004,-0.004,0.0073
1,0.0,-0.0024,-0.0026,0.0037
2,0.343623,0.3437,0.3436,0.3491
3,45.90076,45.9004,45.9005,45.9062
4,58.769236,58.7686,58.7686,58.7666
5,0.0,0.0015,0.0016,0.0106
6,32.157406,32.1528,32.1529,32.1502
7,33.678231,33.6802,33.6802,33.6856
8,0.0,0.0025,0.0027,-0.002
9,0.0,-0.0011,-0.0011,-0.0006
