In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.collections as collections
import pandas as pd
import seaborn as sns
import scipy

import pymc3 as pm
import theano
import theano.tensor as tt
import arviz as az

from NUTS_more_pars_helpers import generate_data, llik_td_vectorized, update_Q, theano_llik_td, get_nuts_est

## Parameter recoverability using NUTS with more pars

In [2]:
true_alpha_neg = .3
true_alpha_pos = .3
true_exp_neg = 1
true_exp_pos = 1
true_beta = 1.5
est_df = get_nuts_est(t_alpha_neg = true_alpha_neg, t_alpha_pos = true_alpha_pos, t_exp_neg = true_exp_neg, t_exp_pos = true_exp_pos, t_beta = true_beta, n=60)

TypeError: Variables do not support boolean operations.

Next steps:

Simulations:

-  Can pyMC recover parameters better for the whole range?
    -  Conclusion: Not necessarily but   
        - it keeps all values within bounds
        - it does not fail in any other way compared to the realistic MLE estimates (i.e. those starting from random values instead of the true values used for data generation)  
        
- Is recoverability worse for generating processes with more parameters
    - TODO: Write new/more generalizable ~~data generation~~ and par recovery functions for different RL models (with more parameters)
    - Compare parameter recoverability for at least one more RL model with more parameters 
    - Not interested in comparing MLE versus NUTS but compare simulation results of NUTS estimates between more simple and complicated processes
    
Fitting:

- Convert the recoverability function to fitting function

```
 # NUTS estimate
    actions_ = theano.shared(np.asarray(actions, dtype='int16'))
    with pm.Model() as m:
        alpha = pm.Beta('alpha', 1, 1)
        beta = pm.HalfNormal('beta', 10)
        like = pm.Potential('like', theano_llik_td(alpha, beta, machines, actions, rewards, n))
        tr = pm.sample()
    
    nuts_alpha_ave = np.mean(tr.alpha)
    nuts_beta_ave = np.mean(tr.beta)
    nuts_alpha_std = np.std(tr.alpha)
    nuts_beta_std = np.std(tr.beta)
    nuts_llik = llik_td_vectorized([nuts_alpha_ave, nuts_beta_ave], *(machines, actions, rewards))
```
- Fit to one subject's data (using the fitting function created above in fit_rl_cv.py)
- Fit to all subjects' data