In [20]:
#@title Import libraries
import sys
import warnings

import pickle
import time
import torch
import numpy as np
import matplotlib.pyplot as plt

import pandas as pd


warnings.filterwarnings("ignore")


In [21]:
# RL libraries
sys.path.append('resources')  # add source directoy to path
from resources import rnn, rnn_training, bandits, rnn_utils


In [3]:
""" # train model (BASELINE)
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = False
n_submodels = 1
ensemble = rnn_training.ensemble_types.NONE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 5
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 200
n_sessions = 256
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  baseline_losses = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      baseline_losses.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  )



# Synthesize a dataset using the fitted network
environment = bandits.EnvironmentBanditsDrift(0.1)
model.set_device(torch.device('cpu'))
model.to(torch.device('cpu'))
rnn_agent = bandits.AgentNetwork(model, n_actions=2)

# Analysis
session_id = 0

choices = experiment_list_test[session_id].choices
rewards = experiment_list_test[session_id].rewards

list_probs = []
list_qs = []

# get q-values from groundtruth
qs_test, probs_test = bandits.get_update_dynamics(experiment_list_test[session_id], agent)
list_probs.append(np.expand_dims(probs_test, 0))
list_qs.append(np.expand_dims(qs_test, 0))

# get q-values from trained rnn
qs_rnn, probs_rnn = bandits.get_update_dynamics(experiment_list_test[session_id], rnn_agent)
list_probs.append(np.expand_dims(probs_rnn, 0))
list_qs.append(np.expand_dims(qs_rnn, 0))

colors = ['tab:blue', 'tab:orange', 'tab:pink', 'tab:grey']

# concatenate all choice probs and q-values
probs = np.concatenate(list_probs, axis=0)
qs = np.concatenate(list_qs, axis=0)

# normalize q-values
def normalize(qs):
  return (qs - np.min(qs, axis=1, keepdims=True)) / (np.max(qs, axis=1, keepdims=True) - np.min(qs, axis=1, keepdims=True))

qs = normalize(qs)
fig, axs = plt.subplots(4, 1, figsize=(20, 10))

reward_probs = np.stack([experiment_list_test[session_id].timeseries[:, i] for i in range(n_actions)], axis=0)
bandits.plot_session(
    compare=True,
    choices=choices,
    rewards=rewards,
    timeseries=reward_probs,
    timeseries_name='Reward Probs',
    labels=[f'Arm {a}' for a in range(n_actions)],
    color=['tab:purple', 'tab:cyan'],
    binary=not non_binary_reward,
    fig_ax=(fig, axs[0]),
    )

bandits.plot_session(
    compare=True,
    choices=choices,
    rewards=rewards,
    timeseries=probs[:, :, 0],
    timeseries_name='Choice Probs',
    color=colors,
    labels=['Ground Truth', 'RNN'],
    binary=not non_binary_reward,
    fig_ax=(fig, axs[1]),
    )

bandits.plot_session(
    compare=True,
    choices=choices,
    rewards=rewards,
    timeseries=qs[:, :, 0],
    timeseries_name='Q-Values',
    color=colors,
    binary=not non_binary_reward,
    fig_ax=(fig, axs[2]),
    )

dqs_arms = normalize(-1*np.diff(qs, axis=2))

bandits.plot_session(
    compare=True,
    choices=choices,
    rewards=rewards,
    timeseries=dqs_arms[:, :, 0],
    timeseries_name='dQ/dActions',
    color=colors,
    binary=not non_binary_reward,
    fig_ax=(fig, axs[3]),
    )

plt.show()

 """

Automatically generated name for model parameter file: params/params_rnn_b3_f01.pkl.
Training the hybrid RNN...
Epoch 1/5 --- Loss: 0.6085727; Time: 18.1230s; Convergence value: 3.91e-01
Epoch 2/5 --- Loss: 0.6095850; Time: 14.9941s; Convergence value: 1.78e-01
Epoch 3/5 --- Loss: 0.6080438; Time: 13.7053s; Convergence value: 1.05e-01
Epoch 4/5 --- Loss: 0.6078517; Time: 11.4109s; Convergence value: 6.60e-02
Epoch 5/5 --- Loss: 0.6076877; Time: 13.4029s; Convergence value: 6.19e-04
Maximum number of training epochs reached.
Model did not converge yet.

Validating the trained hybrid RNN on a test dataset...
Epoch 1/1 --- Loss: 0.5965887; Time: 1.3506s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.5965887; Time: 0.7666s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.5965887; Time: 0.7329s; Convergence value: nan
Maximum number of training epochs rea

IndexError: tensors used as indices must be long, byte or bool tensors

In [9]:
""" baseline_losses

df = pd.DataFrame(columns=['model', 'loss'])

df["model"] = ["Baseline"]*len(baseline_losses)
df["loss"] = baseline_losses

df1 = pd.read_csv("losses.csv")

losses = df1.append(df)

losses.to_csv("losses.csv", index=False) """

[0.5941007137298584,
 0.5941007137298584,
 0.5941007137298584,
 0.5941007137298584,
 0.5941007137298584,
 0.5941007137298584,
 0.5941007137298584,
 0.5941007137298584,
 0.5941007137298584,
 0.5941007137298584]

In [None]:
# 4 submodels

# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = False
n_submodels = 4
ensemble = rnn_training.ensemble_types.VOTE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 5
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 200
n_sessions = 256
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n2_loss = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n2_loss.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  )



Automatically generated name for model parameter file: params/params_rnn_b3_f01.pkl.
Training the hybrid RNN...
Epoch 1/5 --- Loss: 0.6120324; Time: 40.7008s; Convergence value: 3.88e-01
Epoch 2/5 --- Loss: 0.5564060; Time: 40.5172s; Convergence value: 2.07e-01
Epoch 3/5 --- Loss: 0.5558216; Time: 46.0110s; Convergence value: 1.22e-01
Epoch 4/5 --- Loss: 0.5556806; Time: 47.3550s; Convergence value: 7.72e-02
Epoch 5/5 --- Loss: 0.5556332; Time: 46.4392s; Convergence value: 9.46e-03
Maximum number of training epochs reached.
Model did not converge yet.

Validating the trained hybrid RNN on a test dataset...


AttributeError: module 'torch' has no attribute 'concatenate'


Validating the trained hybrid RNN on a test dataset...


UnboundLocalError: local variable 'model' referenced before assignment

In [None]:

# Synthesize a dataset using the fitted network
environment = bandits.EnvironmentBanditsDrift(0.1)
model.set_device(torch.device('cpu'))
model.to(torch.device('cpu'))
rnn_agent = bandits.AgentNetwork(model, n_actions=2)

# Analysis
session_id = 0

choices = experiment_list_test[session_id].choices
rewards = experiment_list_test[session_id].rewards

list_probs = []
list_qs = []

# get q-values from groundtruth
qs_test, probs_test = bandits.get_update_dynamics(experiment_list_test[session_id], agent)
list_probs.append(np.expand_dims(probs_test, 0))
list_qs.append(np.expand_dims(qs_test, 0))

# get q-values from trained rnn
qs_rnn, probs_rnn = bandits.get_update_dynamics(experiment_list_test[session_id], rnn_agent)
list_probs.append(np.expand_dims(probs_rnn, 0))
list_qs.append(np.expand_dims(qs_rnn, 0))

colors = ['tab:blue', 'tab:orange', 'tab:pink', 'tab:grey']

# concatenate all choice probs and q-values
probs = np.concatenate(list_probs, axis=0)
qs = np.concatenate(list_qs, axis=0)

# normalize q-values
def normalize(qs):
  return (qs - np.min(qs, axis=1, keepdims=True)) / (np.max(qs, axis=1, keepdims=True) - np.min(qs, axis=1, keepdims=True))

qs = normalize(qs)
fig, axs = plt.subplots(4, 1, figsize=(20, 10))

reward_probs = np.stack([experiment_list_test[session_id].timeseries[:, i] for i in range(n_actions)], axis=0)
bandits.plot_session(
    compare=True,
    choices=choices,
    rewards=rewards,
    timeseries=reward_probs,
    timeseries_name='Reward Probs',
    labels=[f'Arm {a}' for a in range(n_actions)],
    color=['tab:purple', 'tab:cyan'],
    binary=not non_binary_reward,
    fig_ax=(fig, axs[0]),
    )

bandits.plot_session(
    compare=True,
    choices=choices,
    rewards=rewards,
    timeseries=probs[:, :, 0],
    timeseries_name='Choice Probs',
    color=colors,
    labels=['Ground Truth', 'RNN'],
    binary=not non_binary_reward,
    fig_ax=(fig, axs[1]),
    )

bandits.plot_session(
    compare=True,
    choices=choices,
    rewards=rewards,
    timeseries=qs[:, :, 0],
    timeseries_name='Q-Values',
    color=colors,
    binary=not non_binary_reward,
    fig_ax=(fig, axs[2]),
    )

dqs_arms = normalize(-1*np.diff(qs, axis=2))

bandits.plot_session(
    compare=True,
    choices=choices,
    rewards=rewards,
    timeseries=dqs_arms[:, :, 0],
    timeseries_name='dQ/dActions',
    color=colors,
    binary=not non_binary_reward,
    fig_ax=(fig, axs[3]),
    )

plt.show()


In [None]:
print(baseline_losses)

[0.588664710521698, 0.5886646509170532, 0.5886647701263428, 0.588664710521698, 0.5886646509170532, 0.588664710521698, 0.588664710521698, 0.588664710521698, 0.5886646509170532, 0.5886646509170532]


In [None]:
# 16 submodels

# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = False
n_submodels = 16
ensemble = rnn_training.ensemble_types.VOTE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 15
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 200
n_sessions = 256
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n16_loss = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n16_loss.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  )



KeyboardInterrupt: 

In [None]:
# 32 submodels

# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = False
n_submodels = 32
ensemble = rnn_training.ensemble_types.VOTE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 25
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 200
n_sessions = 256
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n32_loss = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n32_loss.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  )



In [4]:
""" # 32 submodels, ensemble average

# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = False
n_submodels = 32
ensemble = rnn_training.ensemble_types.AVERAGE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 5
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 200
n_sessions = 256
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n32_eA_loss = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n32_eA_loss.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  )
 """

Automatically generated name for model parameter file: params/params_rnn_b3_f01.pkl.
Training the hybrid RNN...
Epoch 1/5 --- Loss: 0.6424130; Time: 308.1779s; Convergence value: 3.58e-01
Epoch 2/5 --- Loss: 0.5910122; Time: 362.1660s; Convergence value: 1.91e-01
Epoch 3/5 --- Loss: 0.5868228; Time: 362.2209s; Convergence value: 1.14e-01
Epoch 4/5 --- Loss: 0.5854608; Time: 348.2068s; Convergence value: 7.26e-02
Epoch 5/5 --- Loss: 0.5849690; Time: 321.2144s; Convergence value: 1.00e-02
Maximum number of training epochs reached.
Model did not converge yet.

Validating the trained hybrid RNN on a test dataset...
Epoch 1/1 --- Loss: 0.6152543; Time: 2.0071s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.6152543; Time: 0.7294s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.6152543; Time: 0.6976s; Convergence value: nan
Maximum number of training epoch

In [18]:
""" 
# Synthesize a dataset using the fitted network
environment = bandits.EnvironmentBanditsDrift(0.1)
model.set_device(torch.device('cpu'))
model.to(torch.device('cpu'))
rnn_agent = bandits.AgentNetwork(model, n_actions=2)

# Analysis
session_id = 0

choices = experiment_list_test[session_id].choices
rewards = experiment_list_test[session_id].rewards

list_probs = []
list_qs = []

# get q-values from groundtruth
qs_test, probs_test = bandits.get_update_dynamics(experiment_list_test[session_id], agent)
list_probs.append(np.expand_dims(probs_test, 0))
list_qs.append(np.expand_dims(qs_test, 0))

# get q-values from trained rnn
qs_rnn, probs_rnn = bandits.get_update_dynamics(experiment_list_test[session_id], rnn_agent)
list_probs.append(np.expand_dims(probs_rnn, 0))
list_qs.append(np.expand_dims(qs_rnn, 0))

colors = ['tab:blue', 'tab:orange', 'tab:pink', 'tab:grey']

# concatenate all choice probs and q-values
probs = np.concatenate(list_probs, axis=0)
qs = np.concatenate(list_qs, axis=0)

# normalize q-values
def normalize(qs):
  return (qs - np.min(qs, axis=1, keepdims=True)) / (np.max(qs, axis=1, keepdims=True) - np.min(qs, axis=1, keepdims=True))

qs = normalize(qs)
fig, axs = plt.subplots(4, 1, figsize=(20, 10))

reward_probs = np.stack([experiment_list_test[session_id].timeseries[:, i] for i in range(n_actions)], axis=0)
bandits.plot_session(
    compare=True,
    choices=choices,
    rewards=rewards,
    timeseries=reward_probs,
    timeseries_name='Reward Probs',
    labels=[f'Arm {a}' for a in range(n_actions)],
    color=['tab:purple', 'tab:cyan'],
    binary=not non_binary_reward,
    fig_ax=(fig, axs[0]),
    )

bandits.plot_session(
    compare=True,
    choices=choices,
    rewards=rewards,
    timeseries=probs[:, :, 0],
    timeseries_name='Choice Probs',
    color=colors,
    labels=['Ground Truth', 'RNN'],
    binary=not non_binary_reward,
    fig_ax=(fig, axs[1]),
    )

bandits.plot_session(
    compare=True,
    choices=choices,
    rewards=rewards,
    timeseries=qs[:, :, 0],
    timeseries_name='Q-Values',
    color=colors,
    binary=not non_binary_reward,
    fig_ax=(fig, axs[2]),
    )

dqs_arms = normalize(-1*np.diff(qs, axis=2))

bandits.plot_session(
    compare=True,
    choices=choices,
    rewards=rewards,
    timeseries=dqs_arms[:, :, 0],
    timeseries_name='dQ/dActions',
    color=colors,
    binary=not non_binary_reward,
    fig_ax=(fig, axs[3]),
    )

plt.show()
 """

IndexError: tensors used as indices must be long, byte or bool tensors

In [17]:
""" import pandas as pd

df = pd.DataFrame(columns=['model', 'loss'])

df["model"] = ["n32_eA_rF_vMedian"]*len(n32_eA_loss)
df["loss"] = n32_eA_loss

df.to_csv("losses.csv", index=False) """



In [10]:
""" # 16 submodels, ensemble average

# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = False
n_submodels = 16
ensemble = rnn_training.ensemble_types.AVERAGE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 5
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 200
n_sessions = 256
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n16_eA_loss = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n16_eA_loss.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  ) """

Automatically generated name for model parameter file: params/params_rnn_b3_f01.pkl.
Training the hybrid RNN...
Epoch 1/5 --- Loss: 0.6656887; Time: 149.2490s; Convergence value: 3.34e-01
Epoch 2/5 --- Loss: 0.5774788; Time: 170.3928s; Convergence value: 2.00e-01
Epoch 3/5 --- Loss: 0.5747513; Time: 179.9744s; Convergence value: 1.20e-01
Epoch 4/5 --- Loss: 0.5744476; Time: 167.1248s; Convergence value: 7.62e-02
Epoch 5/5 --- Loss: 0.5742643; Time: 156.3004s; Convergence value: 1.55e-02
Maximum number of training epochs reached.
Model did not converge yet.

Validating the trained hybrid RNN on a test dataset...
Epoch 1/1 --- Loss: 0.5972804; Time: 1.6211s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.5972804; Time: 0.4725s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.5972804; Time: 0.5172s; Convergence value: nan
Maximum number of training epoch

In [11]:
""" df = pd.DataFrame(columns=['model', 'loss'])

df["model"] = ["n16_eA_loss"]*len(n16_eA_loss)
df["loss"] = n16_eA_loss


df1 = pd.read_csv("losses.csv")
losses = df1.append(df)

losses.to_csv("losses.csv", index=False) """

In [13]:
""" # 4 submodels, ensemble average

# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = False
n_submodels = 4
ensemble = rnn_training.ensemble_types.AVERAGE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 5
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 200
n_sessions = 256
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n4_eA_loss = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n4_eA_loss.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  ) """

Automatically generated name for model parameter file: params/params_rnn_b3_f01.pkl.
Training the hybrid RNN...
Epoch 1/5 --- Loss: 0.5625863; Time: 40.1743s; Convergence value: 4.37e-01
Epoch 2/5 --- Loss: 0.5621565; Time: 43.2809s; Convergence value: 1.99e-01
Epoch 3/5 --- Loss: 0.5623043; Time: 43.7317s; Convergence value: 1.17e-01
Epoch 4/5 --- Loss: 0.5623129; Time: 38.9261s; Convergence value: 7.30e-02
Epoch 5/5 --- Loss: 0.5622482; Time: 42.4550s; Convergence value: 1.28e-04
Maximum number of training epochs reached.
Model did not converge yet.

Validating the trained hybrid RNN on a test dataset...
Epoch 1/1 --- Loss: 0.5826510; Time: 1.1511s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.5826511; Time: 0.8015s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.5826511; Time: 0.4829s; Convergence value: nan
Maximum number of training epochs rea

In [15]:
""" df = pd.DataFrame(columns=['model', 'loss'])

df["model"] = ["n4_eA_rF_vMedian_loss"]*len(n4_eA_loss)
df["loss"] = n4_eA_loss


df1 = pd.read_csv("losses.csv")
losses = df1.append(df)

losses.to_csv("losses.csv", index=False) """



In [16]:
# 4 submodels, ensemble average, MEAN

# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = False
n_submodels = 4
ensemble = rnn_training.ensemble_types.AVERAGE
voting_type = rnn.EnsembleRNN.MEAN  # necessary if ensemble==True


# training parameters
epochs = 5
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 200
n_sessions = 256
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n4_eA_mean_loss = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n4_eA_mean_loss.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  )

Automatically generated name for model parameter file: params/params_rnn_b3_f01.pkl.
Training the hybrid RNN...
Epoch 1/5 --- Loss: 0.6473184; Time: 38.7940s; Convergence value: 3.53e-01
Epoch 2/5 --- Loss: 0.5104391; Time: 49.3358s; Convergence value: 2.35e-01
Epoch 3/5 --- Loss: 0.5069841; Time: 44.6704s; Convergence value: 1.41e-01
Epoch 4/5 --- Loss: 0.5057298; Time: 41.4925s; Convergence value: 9.06e-02
Epoch 5/5 --- Loss: 0.5052085; Time: 41.6943s; Convergence value: 2.41e-02
Maximum number of training epochs reached.
Model did not converge yet.

Validating the trained hybrid RNN on a test dataset...
Epoch 1/1 --- Loss: 0.6006864; Time: 1.5447s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.6006864; Time: 0.6642s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.6006864; Time: 0.6583s; Convergence value: nan
Maximum number of training epochs rea

In [17]:
df = pd.DataFrame(columns=['model', 'loss'])

df["model"] = ["n4_eA_mean_loss"]*len(n4_eA_mean_loss)
df["loss"] = n4_eA_mean_loss



df1 = pd.read_csv("losses.csv")
losses = df1.append(df)

losses.to_csv("losses.csv", index=False)

### Replacement = true

In [18]:
""" # 4 submodels, ensemble average, replacement true

# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = True
n_submodels = 4
ensemble = rnn_training.ensemble_types.AVERAGE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 5
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 200
n_sessions = 256
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n4_eA_rT_vA_loss = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n4_eA_rT_vA_loss.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  ) """

Automatically generated name for model parameter file: params/params_rnn_b3_f01.pkl.
Training the hybrid RNN...
Epoch 1/5 --- Loss: 0.9813210; Time: 44.3188s; Convergence value: 1.87e-02
Epoch 2/5 --- Loss: 0.5939740; Time: 36.5132s; Convergence value: 2.20e-01
Epoch 3/5 --- Loss: 0.6020821; Time: 35.0467s; Convergence value: 1.37e-01
Epoch 4/5 --- Loss: 0.6115658; Time: 44.1253s; Convergence value: 9.46e-02
Epoch 5/5 --- Loss: 0.6077671; Time: 35.1974s; Convergence value: 7.03e-02
Maximum number of training epochs reached.
Model did not converge yet.

Validating the trained hybrid RNN on a test dataset...
Epoch 1/1 --- Loss: 0.5912160; Time: 1.0227s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.5912160; Time: 0.5679s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.5912160; Time: 0.5664s; Convergence value: nan
Maximum number of training epochs rea

In [19]:
# n4_eA_rT_vA_loss

""" df = pd.DataFrame(columns=['model', 'loss'])

df["model"] = ["n4_eA_rT_vA_loss"]*len(n4_eA_rT_vA_loss)
df["loss"] = n4_eA_rT_vA_loss


df1 = pd.read_csv("losses.csv")
losses = df1.append(df)

losses.to_csv("losses.csv", index=False) """



# until HERE

In [23]:
# 16 submodels, ensemble average, replacement true

# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = True
n_submodels = 16
ensemble = rnn_training.ensemble_types.AVERAGE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 5
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 200
n_sessions = 256
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n16_eA_rT_vA = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n16_eA_rT_vA.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  )




Automatically generated name for model parameter file: params/params_rnn_b3_f01.pkl.
Training the hybrid RNN...
Epoch 1/5 --- Loss: 0.7806702; Time: 156.4051s; Convergence value: 2.19e-01
Epoch 2/5 --- Loss: 0.6086048; Time: 195.4610s; Convergence value: 1.94e-01
Epoch 3/5 --- Loss: 0.6179161; Time: 174.1217s; Convergence value: 1.20e-01
Epoch 4/5 --- Loss: 0.6172372; Time: 174.7405s; Convergence value: 7.76e-02
Epoch 5/5 --- Loss: 0.6013812; Time: 169.7018s; Convergence value: 3.62e-02
Maximum number of training epochs reached.
Model did not converge yet.

Validating the trained hybrid RNN on a test dataset...
Epoch 1/1 --- Loss: 0.5916854; Time: 1.4950s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.5916854; Time: 0.9518s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.5916854; Time: 0.7560s; Convergence value: nan
Maximum number of training epoch

PermissionError: [Errno 13] Permission denied: 'losses.csv'

In [24]:
df = pd.DataFrame(columns=['model', 'loss'])

df["model"] = ["n16_eA_rT_vA"]*len(n16_eA_rT_vA)
df["loss"] = n16_eA_rT_vA


df1 = pd.read_csv("losses.csv")
losses = df1.append(df)

losses.to_csv("losses.csv", index=False)

In [25]:
# 32 submodels, ensemble average, replacement true, Mmedian

# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = True
n_submodels = 32
ensemble = rnn_training.ensemble_types.AVERAGE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 5
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 200
n_sessions = 256
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n32_eA_rT_vMed = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n32_eA_rT_vMed.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  )




df = pd.DataFrame(columns=['model', 'loss'])

df["model"] = ["n32_eA_rT_vMed"]*len(n32_eA_rT_vMed)
df["loss"] = n32_eA_rT_vMed


df1 = pd.read_csv("losses.csv")
losses = df1.append(df)

losses.to_csv("losses.csv", index=False)

Automatically generated name for model parameter file: params/params_rnn_b3_f01.pkl.
Training the hybrid RNN...
Epoch 1/5 --- Loss: 0.7097447; Time: 388.3936s; Convergence value: 2.90e-01
Epoch 2/5 --- Loss: 0.5554522; Time: 376.2014s; Convergence value: 2.16e-01
Epoch 3/5 --- Loss: 0.5579701; Time: 326.1727s; Convergence value: 1.30e-01
Epoch 4/5 --- Loss: 0.5586378; Time: 348.1144s; Convergence value: 8.36e-02
Epoch 5/5 --- Loss: 0.5581956; Time: 332.8312s; Convergence value: 2.66e-02
Maximum number of training epochs reached.
Model did not converge yet.

Validating the trained hybrid RNN on a test dataset...
Epoch 1/1 --- Loss: 0.6050161; Time: 1.2164s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.6050161; Time: 0.6356s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.6050161; Time: 0.5270s; Convergence value: nan
Maximum number of training epoch

### N sessions

In [26]:
# 4 submodels, ensemble average, replacement false, n sessions = 32

# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = False
n_submodels = 4
ensemble = rnn_training.ensemble_types.AVERAGE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 5
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 200
n_sessions = 32
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n4_eA_rF_vMed_s32 = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n4_eA_rF_vMed_s32.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  )



df = pd.DataFrame(columns=['model', 'loss'])

df["model"] = ["n4_eA_rF_vMed_s32"]*len(n4_eA_rF_vMed_s32)
df["loss"] = n4_eA_rF_vMed_s32


df1 = pd.read_csv("losses.csv")
losses = df1.append(df)

losses.to_csv("losses.csv", index=False)

Automatically generated name for model parameter file: params/params_rnn_b3_f01.pkl.
Training the hybrid RNN...
Epoch 1/5 --- Loss: 0.7676185; Time: 29.5787s; Convergence value: 2.32e-01
Epoch 2/5 --- Loss: 0.6138930; Time: 29.1224s; Convergence value: 1.89e-01
Epoch 3/5 --- Loss: 0.6126850; Time: 28.9014s; Convergence value: 1.14e-01
Epoch 4/5 --- Loss: 0.6008949; Time: 28.5122s; Convergence value: 7.72e-02
Epoch 5/5 --- Loss: 0.5930960; Time: 28.3805s; Convergence value: 3.18e-02
Maximum number of training epochs reached.
Model did not converge yet.

Validating the trained hybrid RNN on a test dataset...
Epoch 1/1 --- Loss: 0.6029521; Time: 0.7832s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.6029521; Time: 0.5164s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.6029521; Time: 0.5945s; Convergence value: nan
Maximum number of training epochs rea

In [27]:
# 4 submodels, ensemble average, replacement false, n sessions = 64


# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = False
n_submodels = 4
ensemble = rnn_training.ensemble_types.AVERAGE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 5
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 200
n_sessions = 64
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n4_eA_rF_vMed_s64 = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n4_eA_rF_vMed_s64.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  )



df = pd.DataFrame(columns=['model', 'loss'])

df["model"] = ["n4_eA_rF_vMed_s64"]*len(n4_eA_rF_vMed_s64)
df["loss"] = n4_eA_rF_vMed_s64


df1 = pd.read_csv("losses.csv")
losses = df1.append(df)

losses.to_csv("losses.csv", index=False)


Automatically generated name for model parameter file: params/params_rnn_b3_f01.pkl.
Training the hybrid RNN...
Epoch 1/5 --- Loss: 0.5211298; Time: 33.8978s; Convergence value: 4.79e-01
Epoch 2/5 --- Loss: 0.5298360; Time: 29.3004s; Convergence value: 2.22e-01
Epoch 3/5 --- Loss: 0.5243396; Time: 34.9633s; Convergence value: 1.33e-01
Epoch 4/5 --- Loss: 0.5216225; Time: 29.3338s; Convergence value: 8.42e-02
Epoch 5/5 --- Loss: 0.5190710; Time: 29.3163s; Convergence value: 4.28e-03
Maximum number of training epochs reached.
Model did not converge yet.

Validating the trained hybrid RNN on a test dataset...
Epoch 1/1 --- Loss: 0.6378747; Time: 0.7667s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.6378747; Time: 0.5252s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.6378747; Time: 0.5513s; Convergence value: nan
Maximum number of training epochs rea

In [28]:
# 4 submodels, ensemble average, replacement false, n sessions = 128


# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = False
n_submodels = 4
ensemble = rnn_training.ensemble_types.AVERAGE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 5
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 200
n_sessions = 128
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n4_eA_rF_vMed_s128 = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n4_eA_rF_vMed_s128.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  )



df = pd.DataFrame(columns=['model', 'loss'])

df["model"] = ["n4_eA_rF_vMed_s128"]*len(n4_eA_rF_vMed_s128)
df["loss"] = n4_eA_rF_vMed_s128


df1 = pd.read_csv("losses.csv")
losses = df1.append(df)

losses.to_csv("losses.csv", index=False)



# run til HERE


Automatically generated name for model parameter file: params/params_rnn_b3_f01.pkl.
Training the hybrid RNN...
Epoch 1/5 --- Loss: 0.5421701; Time: 32.4095s; Convergence value: 4.58e-01
Epoch 2/5 --- Loss: 0.5430398; Time: 30.6045s; Convergence value: 2.09e-01
Epoch 3/5 --- Loss: 0.5407185; Time: 31.0489s; Convergence value: 1.23e-01
Epoch 4/5 --- Loss: 0.5400136; Time: 30.7992s; Convergence value: 7.74e-02
Epoch 5/5 --- Loss: 0.5396372; Time: 32.2932s; Convergence value: 9.82e-04
Maximum number of training epochs reached.
Model did not converge yet.

Validating the trained hybrid RNN on a test dataset...
Epoch 1/1 --- Loss: 0.5863687; Time: 0.8164s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.5863686; Time: 0.5332s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.5863686; Time: 0.5687s; Convergence value: nan
Maximum number of training epochs rea

### Trials

In [29]:
# 4 submodels, ensemble average, replacement false, n sessions = 128, trials 50


# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = False
n_submodels = 4
ensemble = rnn_training.ensemble_types.AVERAGE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 5
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 50 #200
n_sessions = 128
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n4_eA_rF_vMed_s128_t50 = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n4_eA_rF_vMed_s128_t50.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  )



df = pd.DataFrame(columns=['model', 'loss'])

df["model"] = ["n4_eA_rF_vMed_s128_t50"]*len(n4_eA_rF_vMed_s128_t50)
df["loss"] = n4_eA_rF_vMed_s128_t50


df1 = pd.read_csv("losses.csv")
losses = df1.append(df)

losses.to_csv("losses.csv", index=False)

Automatically generated name for model parameter file: params/params_rnn_b3_f01.pkl.
Training the hybrid RNN...
Epoch 1/5 --- Loss: 1.0352575; Time: 7.7921s; Convergence value: 3.53e-02
Epoch 2/5 --- Loss: 0.6572930; Time: 7.1495s; Convergence value: 2.22e-01
Epoch 3/5 --- Loss: 0.5730930; Time: 6.8980s; Convergence value: 1.69e-01
Epoch 4/5 --- Loss: 0.5614052; Time: 6.7220s; Convergence value: 1.17e-01
Epoch 5/5 --- Loss: 0.5523785; Time: 6.8675s; Convergence value: 8.80e-02
Maximum number of training epochs reached.
Model did not converge yet.

Validating the trained hybrid RNN on a test dataset...
Epoch 1/1 --- Loss: 0.6678745; Time: 0.6247s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.6678746; Time: 0.5222s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.6678746; Time: 0.5353s; Convergence value: nan
Maximum number of training epochs reached.

In [30]:
# 4 submodels, ensemble average, replacement false, n sessions = 64, 50 trials


# train model 
train = True
checkpoint = False
data = False

path_data = 'data/dataset_train.pkl'
params_path = 'params/params_lstm_b3.pkl'  # overwritten if data is False (adapted to the ground truth model)

# rnn parameters
hidden_size = 4
last_output = False
last_state = False
use_lstm = False

# ensemble parameters
evolution_interval = None
sampling_replacement = False
n_submodels = 4
ensemble = rnn_training.ensemble_types.AVERAGE
voting_type = rnn.EnsembleRNN.MEDIAN  # necessary if ensemble==True


# training parameters
epochs = 5
n_steps_per_call = 16  # None for full sequence
batch_size = None  # None for one batch per epoch
learning_rate = 1e-2
convergence_threshold = 1e-6


# ground truth parameters
gen_alpha = .25
gen_beta = 3
forget_rate = 0.1  # possible values: 0., 0.1
perseverance_bias = 0.
correlated_update = False  # possible values: True, False


# environment parameters
n_actions = 2
sigma = 0.1
n_trials_per_session = 50 #200
n_sessions = 64
correlated_reward = False
non_binary_reward = False


# tracked variables in the RNN
x_train_list = ['xQf','xQr', 'xQc']
control_list = ['ca','ca[k-1]', 'cr']
sindy_feature_list = x_train_list + control_list

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


if not data:
  # setup
  environment = bandits.EnvironmentBanditsDrift(sigma=sigma, n_actions=n_actions, non_binary_reward=non_binary_reward, correlated_reward=correlated_reward)
  agent = bandits.AgentQ(gen_alpha, gen_beta, n_actions, forget_rate, perseverance_bias, correlated_update)  

  dataset_train, experiment_list_train = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=n_trials_per_session,
      n_sessions=n_sessions,
      device=device)

  dataset_test, experiment_list_test = bandits.create_dataset(
      agent=agent,
      environment=environment,
      n_trials_per_session=200,
      n_sessions=1024,
      device=device)
  
  params_path = rnn_utils.parameter_file_naming(
      'params/params',
      use_lstm,
      last_output,
      last_state,
      gen_beta,
      forget_rate,
      perseverance_bias,
      correlated_update,
      non_binary_reward,
      verbose=True,
  )
  
else:
  # load data
  with open(path_data, 'rb') as f:
      dataset_train = pickle.load(f)

if ensemble > -1 and n_submodels == 1:
  Warning('Ensemble is actived but n_submodels is set to 1. Deactivating ensemble...')
  ensemble = rnn_training.ensemble_types.NONE

# define model
if use_lstm:
  model = rnn.LSTM(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      device=device,
      ).to(device)
else:
  model = [rnn.RLRNN(
      n_actions=n_actions, 
      hidden_size=hidden_size, 
      init_value=0.5,
      last_output=last_output,
      last_state=last_state,
      device=device,
      list_sindy_signals=sindy_feature_list,
      ).to(device)
           for _ in range(n_submodels)]

optimizer_rnn = [torch.optim.Adam(m.parameters(), lr=learning_rate) for m in model]

if checkpoint:
    # load trained parameters
    state_dict = torch.load(params_path, map_location=torch.device('cpu'))
    state_dict_model = state_dict['model']
    state_dict_optimizer = state_dict['optimizer']
    if isinstance(state_dict_model, dict):
      for m, o in zip(model, optimizer_rnn):
        m.load_state_dict(state_dict_model)
        o.load_state_dict(state_dict_optimizer)
    elif isinstance(state_dict_model, list):
        print('Loading ensemble model...')
        for i, state_dict_model_i, state_dict_optim_i in zip(range(n_submodels), state_dict_model, state_dict_optimizer):
            model[i].load_state_dict(state_dict_model_i)
            optimizer_rnn[i].load_state_dict(state_dict_optim_i)
        rnn = rnn.EnsembleRNN(model, voting_type=voting_type)
    print('Loaded parameters.')

if train:
  
  start_time = time.time()
  
  #Fit the hybrid RNN
  print('Training the hybrid RNN...')
  model, optimizer_rnn, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      optimizer=optimizer_rnn,
      convergence_threshold=convergence_threshold,
      epochs=epochs,
      batch_size=batch_size,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      sampling_replacement=sampling_replacement,
      evolution_interval=evolution_interval,
      n_steps_per_call=n_steps_per_call,
  )
  

  n4_eA_rF_vMed_s64_t50 = []

  # validate model
  print('\nValidating the trained hybrid RNN on a test dataset...')

  for _ in range(10):
    with torch.no_grad():
      model, _, loss = rnn_training.fit_model(
          model=model,
          dataset=dataset_test,
          n_steps_per_call=1,
      )
      n4_eA_rF_vMed_s64_t50.append(float(loss))


  print(f'Training took {time.time() - start_time:.2f} seconds.')
  

  # save trained parameters  
  state_dict = {
    'model': model.state_dict() if isinstance(model, torch.nn.Module) else [model_i.state_dict() for model_i in model],
    'optimizer': optimizer_rnn.state_dict() if isinstance(optimizer_rnn, torch.optim.Adam) else [optim_i.state_dict() for optim_i in optimizer_rnn],
  }
  torch.save(state_dict, params_path)
  
  print(f'Saved RNN parameters to file {params_path}.')

else:
  model, _, _ = rnn_training.fit_model(
      model=model,
      dataset=dataset_train,
      epochs=0,
      n_submodels=n_submodels,
      ensemble_type=ensemble,
      voting_type=voting_type,
      verbose=True
  )



df = pd.DataFrame(columns=['model', 'loss'])

df["model"] = ["n4_eA_rF_vMed_s64_t50"]*len(n4_eA_rF_vMed_s64_t50)
df["loss"] = n4_eA_rF_vMed_s64_t50


df1 = pd.read_csv("losses.csv")
losses = df1.append(df)

losses.to_csv("losses.csv", index=False)

Automatically generated name for model parameter file: params/params_rnn_b3_f01.pkl.
Training the hybrid RNN...
Epoch 1/5 --- Loss: 0.6090691; Time: 6.9239s; Convergence value: 3.91e-01
Epoch 2/5 --- Loss: 0.5869692; Time: 6.8027s; Convergence value: 1.90e-01
Epoch 3/5 --- Loss: 0.5752981; Time: 6.6552s; Convergence value: 1.16e-01
Epoch 4/5 --- Loss: 0.5748299; Time: 6.6199s; Convergence value: 7.35e-02
Epoch 5/5 --- Loss: 0.5749742; Time: 6.6964s; Convergence value: 6.46e-03
Maximum number of training epochs reached.
Model did not converge yet.

Validating the trained hybrid RNN on a test dataset...
Epoch 1/1 --- Loss: 0.5737439; Time: 0.6687s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.5737439; Time: 0.5791s; Convergence value: nan
Maximum number of training epochs reached.
Model did not converge yet.
Epoch 1/1 --- Loss: 0.5737439; Time: 0.6006s; Convergence value: nan
Maximum number of training epochs reached.