In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

import time
import random

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [None]:
class GRU_RNN_TWO(nn.Module):
    
    """     
    """
    
    def __init__(self,input_size, hidden_size, num_parameters_embedding, output_size, dropout):
        super(GRU_RNN_TWO, self).__init__()
        
        self.input_size = input_size
        self.num_parameters_embedding = num_parameters_embedding
        
        
        self.hidden_0 = nn.GRU(  
                    input_size=input_size,
                    hidden_size=hidden_size,
                    num_layers=1,
                    batch_first=True,
                    dropout=dropout
        )
        
        self.hidden_1 = nn.GRU(  
                    input_size=input_size + num_parameters_embedding,
                    hidden_size=hidden_size,
                    num_layers=1,
                    batch_first=True,
                    dropout=dropout 
        )
    
        self.out_0 = nn.Linear(hidden_size, num_parameters_embedding)
        self.relu = nn.ReLU()
        
        self.reg_a = nn.Linear(num_parameters_embedding, 1)
        self.reg_b = nn.Linear(num_parameters_embedding, 1)
        
        self.out_1 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):

        # parameters estimation
        output_0, hn_0 = self.hidden_0(x)
        output_0 = self.out_0(output_0)
        output_0 = self.relu(output_0)
        
        output_a = self.reg_a(output_0)
        output_b = self.reg_b(output_0)

        # concat input 
        input_1 = torch.concat([x[0],output_0[0]],dim=1)
        input_1 = input_1.reshape(1,x.shape[1],self.input_size + self.num_parameters_embedding)
        
        # action predication
        output_1, hn_1 = self.hidden_1(input_1)
        output_1 = self.out_1(output_1)
        output_1 = F.softmax(output_1,dim=-1)

        return output_0, output_1, hn_0, hn_1, output_a, output_b

In [None]:
class behavior_dataset(Dataset):
    """         
    """
    def __init__(self,dataframe):
        
        # action one hot transformation 
        action = np.array(dataframe['action'])
        if np.all(action == action[0]):
            action = np.append(action,(1-action[0]))
            action = torch.tensor((action).reshape(len(dataframe) + 1),dtype=int)
            action_onehot = nn.functional.one_hot(action, len(action.unique()))
            # delete last one
            action_onehot = action_onehot[:-1]
        else:
            action = torch.tensor((action).reshape(len(dataframe)),dtype=int)
            action_onehot = nn.functional.one_hot(action, len(action.unique()))
        
        # reward
        reward = torch.tensor((np.array(dataframe['reward'])).reshape(len(dataframe)),dtype=int)
        
        # concatinating reward and action
        reward_action = torch.cat([reward[ :, np.newaxis], action_onehot],1)
        
        # adding dummy zeros to the beginning and ignoring the last one
        reward_action_shift = nn.functional.pad(reward_action,[0,0,1,0])[:-1]
        
        n_blocks = int(len(dataframe)/10)
        reward_action_shift.reshape(n_blocks,10,INPUT_SIZE)[:,0,:] = torch.zeros(size=(n_blocks,INPUT_SIZE))
        
        # parameters one hot transformation 
        parameters_embedding = torch.tensor((np.array(dataframe['parameters_embedding'])).reshape(len(dataframe)),dtype=int)
        # parameters one hot transformation 
        parameters_embedding = parameters_embedding.type(dtype=torch.float32)
        
        p_a = torch.tensor((np.array(dataframe['alpha'])).reshape(len(dataframe)),dtype=torch.float32)
        p_b = torch.tensor((np.array(dataframe['beta'])).reshape(len(dataframe)),dtype=torch.float32)
        
        # network input 
        x = reward_action_shift
        
        # network output 
        y = torch.cat([action_onehot, parameters_embedding[ :, np.newaxis],
                       p_a[ :, np.newaxis], p_b[ :, np.newaxis],
                      ],1)
  
        self.x = x.type(dtype=torch.float32)
        self.y = y.type(dtype=torch.float32)
        self.len = len(dataframe)

    def __getitem__(self,idx):
        return self.x[idx],self.y[idx]
  
    def __len__(self):
        return self.len    
    
class merge_behavior_dataset(Dataset):
    """ 
    Merge Dataset of each agent to one dataset
    
    Args:
        dataset_list: list of Dataset of all agent 
        n_trials: num_of_trials each agent was simulated
        
    Returns: 
        torch Dataset:
        x: [reward_(t-1) , action_(t-1)] all agents
        y: [action_t, parameter embedding] all agents
        
    """
    def __init__(self, dataset_list, n_trials):
        X = []
        Y = []
        for agent in dataset_list:
            for i in range(n_trials):
                X.append(agent[i][0])
                Y.append(agent[i][1])
                
        self.x = torch.stack(X).type(dtype=torch.float32)
        self.y = torch.stack(Y).type(dtype=torch.float32)
        self.len = len(X)   
    
    def __getitem__(self,idx):
        return self.x[idx],self.y[idx]
  
    def __len__(self):
        return self.len    

In [None]:
# preprocessing the data  

# read file 

# path = f'../../data/artificial_trainset_2000_fix.csv'
# path = f'../../data/artificial_trainset_2000_rapid.csv'

path = f'../../data/artificial_trainset_2000.csv'

df = pd.read_csv(path)
df.reward[df.reward == 5] = 1


# define constant 
num_of_action = df['action'].nunique()
num_of_trials = df['trial_num'].nunique()
num_of_agents = df['agent_index'].nunique()
num_parameters_embedding = df['parameters_embedding'].nunique()

# netowrk input and output dimension  
Input_size = 1 + num_of_action
Output_size = num_of_action + num_parameters_embedding

# train val test split 
n_agent_train = int(0.8*num_of_agents)
n_agent_val = int(0.2*num_of_agents)

all_data = []
for i in range(num_of_agents):
    s = i*num_of_trials
    e = (i+1)*num_of_trials
    cur_df = df.iloc[s:e]
    cur_df = cur_df.reset_index()
    all_data.append([i,behavior_dataset(cur_df)])
    
random.shuffle(all_data)
all_data = np.array(all_data)
train_dataset = all_data[:n_agent_train,1]
train_dataset = merge_behavior_dataset(train_dataset,num_of_trials)

val_dataset = all_data[n_agent_train:,1]
val_dataset = merge_behavior_dataset(val_dataset,num_of_trials)
val_aaa = np.array([all_data[i,0] for i in range(n_agent_train,num_of_agents)])


In [None]:
print('num_of_trials:',num_of_trials)
print('num_of_agents:',num_of_agents)
print('num_parameters_embedding:',num_parameters_embedding)
print('train_size:', n_agent_train*num_of_trials)
print('train_size:', train_dataset[:][0].shape)
print('val_size:', n_agent_val*num_of_trials)
print('val_size:', val_dataset[:][0].shape)
print('val_agents',val_aaa)

In [None]:
def multi_loss(y_hat_action, y_hat_parameters, y_hat_a, y_hat_b, y_true):
    
    # slice true action and true parameters embedding 
    y_true_action = y_true[:,:2]
    y_true_parameters = torch.flatten(y_true[:,2])
    y_true_parameters = y_true_parameters.type(dtype=torch.LongTensor).to(device)
    
    y_true_a =  y_true[:,3]
    y_true_b =  y_true[:,4]
    
    # define losses
    criterion0 = nn.BCELoss()
    criterion1 = nn.CrossEntropyLoss()
    criterion2 = nn.MSELoss()
    
    loss_0 = criterion0(y_hat_action, y_true_action)
    loss_1 = criterion1(y_hat_parameters, y_true_parameters)
    
    loss_a = criterion2(y_hat_a,y_true_a)
    loss_b = criterion2(y_hat_b,y_true_b)

    # combine losses
    total_loss = 10*loss_0 + loss_1 + 300*loss_a + 10*loss_b 
    
    return total_loss, 10*loss_0, loss_1, 300*loss_a, 10*loss_b 

In [None]:
def train_model_two(net, train_loader, val_loader ,epochs, number):
    
    n_step = 1 # 
    index = 0 # 
    
    min_loss_t = 100
    min_loss_v = 100
    
    # array to track loss 
    train_loss_array = np.zeros(shape=(int(epochs/n_step),2)) 
    val_loss_array = np.zeros(shape=(int(epochs/n_step),2))
    
    # move net to GPU
    net.to(device)

    # Use Adam optimizer
    optimizer = optim.Adam(net.parameters(), lr=0.001) 
    
    # start timer
    start_time = time.time()   
    

    # Loop over epochs 
    for i in range(epochs):
        running_loss_0, running_loss_1 = [], []
        running_loss_a, running_loss_b = [], []
        
        # Randomize train batch example 
        train_loader = random.sample(list(train_loader), len(train_loader))
      
        # Loop over training batches
        for j, (X, y_true) in enumerate(train_loader):
    
            X, y_true = X.to(device), y_true.to(device) # move to GPU
            X = X.reshape(1,X.shape[0],Input_size) # reshape to  1 x trials x input_size
            
            optimizer.zero_grad()  # zero the gradient buffers
            
            y_hat_parameters, y_hat_action, hn_0, hn_1,  y_hat_a, y_hat_b  = net(X) # forward pass
            
            y_hat_action = (y_hat_action.view(-1, num_of_action)) # Reshape to (SeqLen x Batch, OutputSize)
            
            y_hat_parameters = (y_hat_parameters.view(-1, num_parameters_embedding)) # Reshape to (SeqLen x Batch, OutputSize)
            
            y_hat_a = (y_hat_a.view(-1)) # Reshape to (SeqLen x Batch, OutputSize)
            y_hat_b = (y_hat_b.view(-1)) # Reshape to (SeqLen x Batch, OutputSize)

            loss, l_0, l_1, loss_a, loss_b = multi_loss(y_hat_action, y_hat_parameters,y_hat_a, y_hat_b ,y_true) # compute loss
            
            loss.backward() # backprop the loss
            optimizer.step() # update the weights 
            
            
            running_loss_0.append(l_0.item())
            running_loss_1.append(l_1.item())
            
            running_loss_a.append(loss_a.item())
            running_loss_b.append(loss_b.item())
              
        
        running_loss_0 = np.array(running_loss_0).mean()
        running_loss_1 = np.array(running_loss_1).mean()
        
        running_loss_reg = (np.array(running_loss_a) + np.array(running_loss_b)).mean()
        
        lv0, lv1, lva, lvb = eval_net_two(net,val_loader)
        
        print('loss bce action',running_loss_0)
        print('loss ce param',running_loss_1)
        
        print('loss mse a',(np.array(running_loss_a).mean()))
        print('loss mse b',(np.array(running_loss_b).mean()))
        
        # Compute the running loss every 30 steps and save model
        if i % n_step == n_step-1:
            
            train_loss_array[index] = running_loss_0, running_loss_1
            tloss = running_loss_0 + running_loss_1 + running_loss_reg
            
            val_loss_array[index] = lv0, lv1
            vloss = lv0 + lv1 + lva + lvb
            
            if tloss <= min_loss_t:
                checkpoint = {'epoch':i+1,'model_state':net.state_dict(),'optim_state':optimizer.state_dict(),'loss':tloss}
                torch.save(checkpoint,f'checkpoint_best_train_{number}_twolayers.pth')
                min_loss_t = tloss
                
            if vloss <= min_loss_v:
                checkpoint = {'epoch':i+1,'model_state':net.state_dict(),'optim_state':optimizer.state_dict(),'loss':vloss}
                torch.save(checkpoint,f'checkpoint_best_val_{number}_twolayers.pth')
                min_loss_v = vloss
                
            print('Step {}, Train Loss {:0.4f}, Val Loss {:0.4f}, Time {:0.1f}s'.format(i+1, tloss, vloss, time.time() - start_time))

                
            index += 1
            net.train()

            
    return net, train_loss_array , val_loss_array 

In [None]:
def eval_net_two(net,val_loader):
    running_loss_0, running_loss_1 = [], []
    running_loss_a, running_loss_b = [], []
    with torch.no_grad():
        net.eval()
        for j, (X, y_true) in enumerate(val_loader):
            
            X, y_true = X.to(device), y_true.to(device) # move to GPU
            X = X.reshape(1,X.shape[0],Input_size) # reshape to  1 x trials x input_size
            y_hat_parameters, y_hat_action, hn_0, hn_1,  y_hat_a, y_hat_b  = net(X) # forward pass
            
            y_hat_action = (y_hat_action.view(-1, num_of_action)) # Reshape to (SeqLen x Batch, OutputSize)
            y_hat_parameters = (y_hat_parameters.view(-1, num_parameters_embedding)) # Reshape to (SeqLen x Batch, OutputSize)            
            y_hat_a = (y_hat_a.view(-1)) # Reshape to (SeqLen x Batch, OutputSize)
            y_hat_b = (y_hat_b.view(-1)) # Reshape to (SeqLen x Batch, OutputSize)
            
            loss, l_0, l_1, loss_a, loss_b = multi_loss(y_hat_action, y_hat_parameters,y_hat_a, y_hat_b ,y_true)
            running_loss_0.append(l_0.item())
            running_loss_1.append(l_1.item())
            running_loss_a.append(loss_a.item())
            running_loss_b.append(loss_b.item())
            
    l_0 = np.array(running_loss_0).mean()
    l_1 = np.array(running_loss_1).mean()
    loss_a = np.array(running_loss_a).mean()
    loss_b = np.array(running_loss_b).mean()
    
    return l_0, l_1, loss_a, loss_b
        

In [None]:
Output_size = 2
number = 0
aBatch = [1000]
aHidden = [32]

for bs in aBatch:
    for hs in aHidden:
        train_loader = DataLoader(train_dataset,shuffle=False,batch_size=bs)
        val_loader = DataLoader(val_dataset,shuffle=False,batch_size=bs)
        rnn = GRU_RNN_TWO(
                      input_size=Input_size,
                      hidden_size=hs,
                      num_parameters_embedding=num_parameters_embedding, 
                      output_size=Output_size,
                      dropout=0.2
                     ) 
        rnn, loss_train, loss_val = train_model_two(rnn, train_loader, val_loader, 100, number)  
        print('Done',bs*hs)