In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.collections as collections
import pandas as pd
import seaborn as sns
import scipy

import pymc3 as pm
import theano
import theano.tensor as tt
from theano import tensor as T
import arviz as az

from NUTS_more_pars_helpers import generate_data, llik_td_vectorized, update_Q, theano_llik_td#, get_nuts_est

## Parameter recoverability using NUTS with more pars

In [2]:
true_alpha_neg = .3
true_alpha_pos = .3
true_exp_neg = 1
true_exp_pos = 1
true_beta = 1.5
n=120
#est_df = get_nuts_est(t_alpha_neg = true_alpha_neg, t_alpha_pos = true_alpha_pos, t_exp_neg = true_exp_neg, t_exp_pos = true_exp_pos, t_beta = true_beta, n=60)

In [3]:
machines, actions, rewards, all_Qs = generate_data(alpha_neg= true_alpha_neg, alpha_pos= true_alpha_pos, exp_neg= true_exp_neg, exp_pos= true_exp_pos, beta= true_beta)

In [4]:
llik_td_vectorized([true_alpha_neg, true_alpha_pos, true_exp_neg, true_exp_pos, true_beta], *(machines, actions, rewards))

8.918024819930357

In [5]:
# Transform the variables into appropriate Theano objects
machines_ = theano.shared(np.asarray(machines, dtype='int16'))
actions_ = theano.shared(np.asarray(actions, dtype='int16'))
rewards_ = theano.shared(np.asarray(rewards, dtype='int16'))

# Initialize the Q table
Qs = tt.zeros((4,2), dtype='float64')

In [6]:
tmp = update_Q(machines_[0], actions_[0], rewards_[0], Qs, true_alpha_neg, true_alpha_pos, true_exp_neg, true_exp_pos)
tmp.eval()

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

In [7]:
tlik = theano_llik_td(true_alpha_neg, true_alpha_pos, true_exp_neg, true_exp_pos, true_beta, machines, actions, rewards)
tlik

Sum{acc_dtype=float64}.0

In [8]:
with pm.Model() as m:
    alpha_neg = pm.Beta('alpha_neg', 1, 1)
    alpha_pos = pm.Beta('alpha_pos', 1, 1)
    exp_neg = pm.Beta('exp_neg', 1, 1)
    exp_pos = pm.Beta('exp_pos', 1, 1)
    beta = pm.HalfNormal('beta', 10)
    like = pm.Potential('like', theano_llik_td(alpha_neg, alpha_pos, exp_neg, exp_pos, beta, machines, actions, rewards))
    tr = pm.sample()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...


ValueError: Cannot compute test value: input 0 (like) of Op Sum{acc_dtype=float64}(like) missing default value.  
Backtrace when that variable is created:

  File "/Users/zeynepenkavi/anaconda/envs/py37/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2855, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Users/zeynepenkavi/anaconda/envs/py37/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2881, in _run_cell
    return runner(coro)
  File "/Users/zeynepenkavi/anaconda/envs/py37/lib/python3.7/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "/Users/zeynepenkavi/anaconda/envs/py37/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3058, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/zeynepenkavi/anaconda/envs/py37/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3249, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/Users/zeynepenkavi/anaconda/envs/py37/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-bd1f57c90ea0>", line 7, in <module>
    like = pm.Potential('like', theano_llik_td(alpha_neg, alpha_pos, exp_neg, exp_pos, beta, machines, actions, rewards))
  File "/Users/zeynepenkavi/Documents/fmri-nbs/notebooks/dev_study/beh/NUTS_more_pars_helpers.py", line 181, in theano_llik_td
    return tt.sum(log_prob_actions[1:])


In [None]:
alpha_neg = tt.scalar("alpha_neg")
alpha_pos = tt.scalar("alpha_pos")
exp_neg = tt.scalar("exp_neg")
exp_pos = tt.scalar("exp_pos")
Qs = tt.zeros((4,2), dtype='float64')

# Compute the Q values for each trial
Qs, updates = theano.scan(
    fn=update_Q,
    sequences=[machines_, actions_, rewards_],
    outputs_info=[Qs],
    non_sequences=[alpha_neg, alpha_pos, exp_neg, exp_pos])

tmp_func = theano.function(inputs = [alpha_neg, alpha_pos, exp_neg, exp_pos], outputs = Qs)
tmp_res = tmp_func(true_alpha_neg, true_alpha_pos, true_exp_neg, true_exp_pos)

In [None]:
int_Qs = tt.zeros((1, 4,2), dtype='float64')

Qs = tt.concatenate((int_Qs, tmp_res), axis=0)

In [None]:
idx = list(zip(range(100),machines))

In [None]:
obs_Qs = [Qs[i] for i in idx]

In [None]:
Qs_ = np.asarray(obs_Qs) * true_beta

In [None]:
Qs_ = np.asarray(Qs_)
log_prob_actions = Qs_ - pm.math.logsumexp(Qs_, axis=1)

In [None]:
Qs_

Next steps:

Simulations:

-  Can pyMC recover parameters better for the whole range?
    -  Conclusion: Not necessarily but   
        - it keeps all values within bounds
        - it does not fail in any other way compared to the realistic MLE estimates (i.e. those starting from random values instead of the true values used for data generation)  
        
- Is recoverability worse for generating processes with more parameters
    - TODO: Write new/more generalizable ~~data generation~~ and par recovery functions for different RL models (with more parameters)
    - Compare parameter recoverability for at least one more RL model with more parameters 
    - Not interested in comparing MLE versus NUTS but compare simulation results of NUTS estimates between more simple and complicated processes
    
Fitting:

- Convert the recoverability function to fitting function

```
 # NUTS estimate
    actions_ = theano.shared(np.asarray(actions, dtype='int16'))
    with pm.Model() as m:
        alpha = pm.Beta('alpha', 1, 1)
        beta = pm.HalfNormal('beta', 10)
        like = pm.Potential('like', theano_llik_td(alpha, beta, machines, actions, rewards, n))
        tr = pm.sample()
    
    nuts_alpha_ave = np.mean(tr.alpha)
    nuts_beta_ave = np.mean(tr.beta)
    nuts_alpha_std = np.std(tr.alpha)
    nuts_beta_std = np.std(tr.beta)
    nuts_llik = llik_td_vectorized([nuts_alpha_ave, nuts_beta_ave], *(machines, actions, rewards))
```
- Fit to one subject's data (using the fitting function created above in fit_rl_cv.py)
- Fit to all subjects' data