# Estimation

In [1]:
import os
from pathlib import Path

import numpy as np
import pandas as pd

In [2]:
# import cmdstanpy
# cmdstanpy.install_cmdstan()
from cmdstanpy import CmdStanModel

In [3]:
def display_estimation_results(var_names: list[str], posteriors):
	data = []
	for name in var_names:
		posterior_mean = posteriors[name].mean()
		posterior_std = posteriors[name].std()
		quantile_05, quantile_95 = np.percentile(posteriors[name], [2.5, 97.5])
		record = (
			name, posterior_mean, posterior_std, quantile_05, quantile_95
		)
		data.append(record)
	columns = ['Name', 'Posterior Mean', 'Posterior Std.', '2.5% Quantile', '97.5% Quantile']
	return pd.DataFrame(data, columns=columns)

### Estimations

In [11]:
wd = os.getcwd()
wd_jsondata = os.path.join(wd, '__jsondata__')
all_json_datas = [os.path.join(wd_jsondata, f) for f in os.listdir(wd_jsondata)]
all_json_datas = [f for f in all_json_datas if os.path.isfile(f)]
all_json_datas

['/Users/zhuanglinsheng/Documents/Github/kaggle-contest-design/metakaggle/__jsondata__/contest_2435.json',
 '/Users/zhuanglinsheng/Documents/Github/kaggle-contest-design/metakaggle/__jsondata__/contest_2445.json']

In [5]:
# build stan model
stan_file = os.path.join(wd, f'real_data.stan')
output_dir = Path('./tmp')
model = CmdStanModel(stan_file=stan_file)

12:40:00 - cmdstanpy - INFO - compiling stan file /Users/zhuanglinsheng/Documents/Github/kaggle-contest-design/metakaggle/real_data.stan to exe file /Users/zhuanglinsheng/Documents/Github/kaggle-contest-design/metakaggle/real_data
12:40:09 - cmdstanpy - INFO - compiled model executable: /Users/zhuanglinsheng/Documents/Github/kaggle-contest-design/metakaggle/real_data


In [20]:
# fit the model with data
fit = model.sample( \
		data=all_json_datas[1],
		iter_warmup=1000,
		iter_sampling=2000,
		chains=4,
		parallel_chains=4,
		show_console=False,
		max_treedepth=12,  # for difficult model
		adapt_delta=0.99,  # for difficult model
		output_dir=output_dir,
		seed=12345,
	)

13:18:30 - cmdstanpy - INFO - CmdStan start processing


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

13:19:51 - cmdstanpy - INFO - CmdStan done processing.
Exception: Exception: normal_lpdf: Random variable is nan, but must be not nan! (in '/Users/zhuanglinsheng/Documents/Github/kaggle-contest-design/metakaggle/model_effort.stan', line 81, column 2, included from
Exception: Exception: normal_lpdf: Random variable is nan, but must be not nan! (in '/Users/zhuanglinsheng/Documents/Github/kaggle-contest-design/metakaggle/model_effort.stan', line 81, column 2, included from
Consider re-running with show_console=True if the above output is unclear!





In [24]:
posteriors = fit.stan_variables()

In [25]:
display_estimation_results(
	['c_i', 'c_j', 'sigma', 'lambda', 'mu_0'],
	posteriors
)

Unnamed: 0,Name,Posterior Mean,Posterior Std.,2.5% Quantile,97.5% Quantile
0,c_i,4.76752,0.220283,4.208324,4.99446
1,c_j,4.080302,0.596077,2.778893,4.952996
2,sigma,3.564731,0.518691,2.689159,4.738213
3,lambda,8.402612,1.230009,5.519694,9.933632
4,mu_0,-0.024796,5.04413,-9.679945,9.788884


In [26]:
for file in output_dir.iterdir():
	if file.is_file():
		file.unlink()