# Estimations

In [1]:
import os
from pathlib import Path

import numpy as np
import pandas as pd

In [2]:
# import cmdstanpy
# cmdstanpy.install_cmdstan()
from cmdstanpy import CmdStanModel

In [3]:
def display_estimation_results(var_names: list[str], posteriors):
	data = []
	for name in var_names:
		posterior_mean = posteriors[name].mean()
		posterior_std = posteriors[name].std()
		quantile_05, quantile_95 = np.percentile(posteriors[name], [2.5, 97.5])
		record = (
			name, posterior_mean, posterior_std, quantile_05, quantile_95
		)
		data.append(record)
	columns = ['Name', 'Posterior Mean', 'Posterior Std.', '2.5% Quantile', '97.5% Quantile']
	return pd.DataFrame(data, columns=columns)

### Estimations

In [10]:
wd = os.getcwd()
wd_jsondata = os.path.join(wd, '__jsondata__')
all_json_datas = [os.path.join(wd_jsondata, f) for f in os.listdir(wd_jsondata)]
all_json_datas = [f for f in all_json_datas if os.path.isfile(f)]
len(all_json_datas)

35

In [5]:
# build stan model
stan_file = os.path.join(wd, f'real_data.stan')
output_dir = Path('./tmp')
model = CmdStanModel(stan_file=stan_file)

In [6]:
# fit the model with data
fit = model.sample( \
		data=all_json_datas[1],
		iter_warmup=1000,
		iter_sampling=2000,
		chains=4,
		parallel_chains=4,
		show_console=False,
		max_treedepth=12,  # for difficult model
		adapt_delta=0.99,  # for difficult model
		output_dir=output_dir,
		seed=12345,
	)

16:18:38 - cmdstanpy - INFO - CmdStan start processing


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

16:20:41 - cmdstanpy - INFO - CmdStan done processing.
Exception: Exception: normal_lpdf: Random variable is nan, but must be not nan! (in '/Users/zhuanglinsheng/Documents/Github/kaggle-contest-design/metakaggle/model_effort.stan', line 81, column 2, included from
Exception: Exception: normal_lpdf: Random variable is nan, but must be not nan! (in '/Users/zhuanglinsheng/Documents/Github/kaggle-contest-design/metakaggle/model_effort.stan', line 81, column 2, included from
	Exception: Exception: normal_lpdf: Random variable is nan, but must be not nan! (in '/Users/zhuanglinsheng/Documents/Github/kaggle-contest-design/metakaggle/model_effort.stan', line 81, column 2, included from
	Exception: Exception: normal_lpdf: Random variable is nan, but must be not nan! (in '/Users/zhuanglinsheng/Documents/Github/kaggle-contest-design/metakaggle/model_effort.stan', line 81, column 2, included from
Exception: Exception: normal_lpdf: Random variable is nan, but must be not nan! (in '/Users/zhuanglinsh




In [7]:
posteriors = fit.stan_variables()

In [8]:
display_estimation_results(
	['c_i', 'c_j', 'sigma', 'lambda', 'mu_0'],
	posteriors
)

Unnamed: 0,Name,Posterior Mean,Posterior Std.,2.5% Quantile,97.5% Quantile
0,c_i,4.469057,0.42379,3.451708,4.982
1,c_j,4.588866,0.347443,3.705345,4.98545
2,sigma,9.067458,0.661098,7.575421,9.965371
3,lambda,5.069414,2.321195,1.116562,9.438949
4,mu_0,-0.012563,5.007155,-9.471783,10.069472


In [9]:
for file in output_dir.iterdir():
	if file.is_file():
		file.unlink()