# Estimations

In [1]:
import os
import pickle
from pathlib import Path

import numpy as np
import pandas as pd

In [2]:
# import cmdstanpy
# cmdstanpy.install_cmdstan()
from cmdstanpy import CmdStanModel

In [3]:
def display_estimation_results(var_names: list[str], posteriors):
	data = []
	for name in var_names:
		posterior_mean = posteriors[name].mean()
		posterior_std = posteriors[name].std()
		quantile_05, quantile_95 = np.percentile(posteriors[name], [2.5, 97.5])
		record = (
			name, posterior_mean, posterior_std, quantile_05, quantile_95
		)
		data.append(record)
	columns = ['Name', 'Posterior Mean', 'Posterior Std.', '2.5% Quantile', '97.5% Quantile']
	return pd.DataFrame(data, columns=columns)

### Estimations

In [4]:
wd = os.getcwd()
wd_jsondata = Path(os.path.join(wd, '__jsondata__'))
all_json_datas = list(wd_jsondata.rglob('*.json'))
len(all_json_datas)

14

In [5]:
# build stan model
stan_file = os.path.join(wd, f'real_data.stan')
output_dir = Path('./tmp')
model = CmdStanModel(stan_file=stan_file)

In [6]:
# fit the model with data
for json_data in all_json_datas:
	contest_name = json_data.stem
	posterior_file = f'posteriors_{contest_name}.pkl'
	posterior_file_path = wd_jsondata.joinpath(posterior_file)
	if posterior_file_path.exists():
		continue
	print(contest_name, 'begin')
	fit = model.sample( \
		data=json_data,
		iter_warmup=1000,
		iter_sampling=2000,
		chains=4,
		parallel_chains=4,
		show_console=False,
		max_treedepth=12,  # for difficult model
		adapt_delta=0.99,  # for difficult model
		output_dir=output_dir,
		seed=12345,
	)
	posteriors = fit.stan_variables()
	with open(posterior_file, 'wb') as f:
		pickle.dump(posteriors, f)
	print(contest_name, 'end')

17:51:29 - cmdstanpy - INFO - CmdStan start processing


contest_2589 begin


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

17:52:22 - cmdstanpy - INFO - CmdStan done processing.
Exception: Exception: normal_lpdf: Random variable is nan, but must be not nan! (in '/Users/linsheng/Documents/GitHub/kaggle-contest-design/metakaggle/model_effort.stan', line 81, column 2, included from
Exception: Exception: normal_lpdf: Random variable is nan, but must be not nan! (in '/Users/linsheng/Documents/GitHub/kaggle-contest-design/metakaggle/model_effort.stan', line 81, column 2, included from
	Exception: Exception: normal_lpdf: Random variable is nan, but must be not nan! (in '/Users/linsheng/Documents/GitHub/kaggle-contest-design/metakaggle/model_effort.stan', line 81, column 2, included from
	Exception: Exception: normal_lpdf: Random variable is nan, but must be not nan! (in '/Users/linsheng/Documents/GitHub/kaggle-contest-design/metakaggle/model_effort.stan', line 81, column 2, included from
Consider re-running with show_console=True if the above output is unclear!





17:52:29 - cmdstanpy - INFO - CmdStan start processing


contest_2589 end
contest_2435 begin


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

17:52:50 - cmdstanpy - INFO - CmdStan done processing.





17:52:53 - cmdstanpy - INFO - CmdStan start processing


contest_2435 end
contest_3526 begin


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

17:55:15 - cmdstanpy - INFO - CmdStan done processing.
Exception: Exception: normal_lpdf: Random variable is nan, but must be not nan! (in '/Users/linsheng/Documents/GitHub/kaggle-contest-design/metakaggle/model_effort.stan', line 81, column 2, included from
Consider re-running with show_console=True if the above output is unclear!





17:55:22 - cmdstanpy - INFO - CmdStan start processing


contest_3526 end
contest_2478 begin


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

17:56:05 - cmdstanpy - INFO - CmdStan done processing.





17:56:11 - cmdstanpy - INFO - CmdStan start processing


contest_2478 end
contest_2454 begin


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

17:56:51 - cmdstanpy - INFO - CmdStan done processing.





17:56:56 - cmdstanpy - INFO - CmdStan start processing


contest_2454 end
contest_2445 begin


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

17:57:31 - cmdstanpy - INFO - CmdStan done processing.
Exception: Exception: normal_lpdf: Random variable is nan, but must be not nan! (in '/Users/linsheng/Documents/GitHub/kaggle-contest-design/metakaggle/model_effort.stan', line 81, column 2, included from
	Exception: Exception: normal_lpdf: Random variable is nan, but must be not nan! (in '/Users/linsheng/Documents/GitHub/kaggle-contest-design/metakaggle/model_effort.stan', line 81, column 2, included from
Consider re-running with show_console=True if the above output is unclear!





17:57:35 - cmdstanpy - INFO - CmdStan start processing


contest_2445 end
contest_2464 begin


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                

17:57:52 - cmdstanpy - INFO - CmdStan done processing.



contest_2464 end


In [7]:
display_estimation_results(
	['c_i', 'c_j', 'sigma', 'lambda', 'mu_0'],
	posteriors
)

Unnamed: 0,Name,Posterior Mean,Posterior Std.,2.5% Quantile,97.5% Quantile
0,c_i,0.626243,0.125604,0.503532,0.962788
1,c_j,0.604383,0.107485,0.502876,0.890262
2,sigma,9.441136,0.474248,8.245029,9.980237
3,lambda,0.208448,0.095743,0.070087,0.437911
4,mu_0,-0.084615,5.017722,-9.930651,9.932066


In [8]:
for file in output_dir.iterdir():
	if file.is_file():
		file.unlink()