In [70]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Our beautiful functions

In [80]:
def history_presentation(sides, outputs):

    """
    sides : np.array of the sides where the stimulus was presented (-1: Left, 1: right)
    outputs : np.array of the outputs (0: incorrect, 1: correct)
    """
    reinforced = sides * outputs
    punished = sides * (1 - outputs)

    return(np.sum(reinforced - punished))


def history_decision(decisions, outputs):

    """
    decisions : np.array of the decisions where the stimulus was presented (-1: Left, 1: right)
    outputs : np.array of the outputs (0: incorrect, 1: correct)
    """
    reinforced = decisions * outputs
    punished = decisions * (1 - outputs)

    return(np.sum(reinforced - punished))


def bias(sides):
    """
    sides : np.array of the sides where the stimulus was presented (-1: Left, 1: right)
    """

    return(np.mean(sides))


In [98]:
class IBLTrialDataset:

    @staticmethod
    def apply_hist_per_subject(x, func, window_size=1):
        """
        Applies function (func) on dataframe (x) in given window (wind) after groupBy per subject.

        x: pd.DataFrame: (N, n_cols [1 or 2]) - two or one-column dataframe with values need to run function
        window_size: int - numer of trials in history

        """
        x = x.values
        result = np.zeros(x.shape[0])
        if x.ndim > 1:  
            padded = np.vstack([np.zeros((window_size, x.shape[-1])), x])  
            for i in range(window_size, padded.shape[0]):
                x1, x2 = padded[i-window_size: i, 0], padded[i-window_size: i, 1]
                result[i-window_size] = func(x1, x2)
        else:
            padded = np.concatenate([np.zeros(window_size), x]) 
            for i in range(window_size, padded.shape[0]):
                x1 = padded[i-window_size: i]
                result[i-window_size] = func(x1)       

        return pd.DataFrame(result)


    def create_dataset(self, df, window_size):
        """
        Creates dataset as model input (X,y)
        
        df: pd.DataFrame - input data frame
        window_size: int - window_size of history trials
        
        output: 
        X: np.array(N, num_features) - transformed dataset of features that can be an input to the model
        y: np.array(N,) - numpy array of choices true choices
        """

        df.sort_values(by=['session_start_time', 'trial_id'], inplace=True) # to make sure we're sorted for the grouping later on
        
        # ground truth stimuli sides
        df['sides'] = df['signed_contrast']
        df.loc[df.sides > 0, 'sides'] = 1
        df.loc[df.sides < 0, 'sides'] = -1

        # the actual mice response to the stimuli 
        # TODO: verify !
        df.loc[df.trial_response_choice == 'CCW', 'trial_response_choice'] = 1
        df.loc[df.trial_response_choice == 'CW', 'trial_response_choice'] = -1
        
        # derive a column for correct answers
        df['correct'] = df.trial_response_choice == df.sides
        
#         # get history presentation per trial 
        df['history_presentation'] = (df.groupby('subject_uuid')['sides', 'correct'].apply(lambda x: self.apply_hist_per_subject(x, history_presentation, window_size)).reset_index()).iloc[:, -1]
        
#         # get history decision per trial 
        df['history_decision'] = (df.groupby('subject_uuid')['trial_response_choice', 'correct'].apply(lambda x: self.apply_hist_per_subject(x, history_decision, window_size)).reset_index()).iloc[:, -1]
        
        # get bias history per trial
        df['bias'] = (df.groupby('subject_uuid')['sides'].apply(lambda x: self.apply_hist_per_subject(x, bias, window_size)).reset_index()).iloc[:, -1]

        return df[['signed_contrast', 'history_presentation', 'history_decision', 'bias']].values, df['trial_response_choice'].values
    

In [103]:
data_df = pd.read_csv('ibl_dataframe.csv')

ds = IBLTrialDataset()
X, y = ds.create_dataset(data_df, window_size = 10)

  df['history_presentation'] = (df.groupby('subject_uuid')['sides', 'correct'].apply(lambda x: self.apply_hist_per_subject(x, history_presentation, window_size)).reset_index()).iloc[:, -1]
  df['history_decision'] = (df.groupby('subject_uuid')['trial_response_choice', 'correct'].apply(lambda x: self.apply_hist_per_subject(x, history_decision, window_size)).reset_index()).iloc[:, -1]


In [104]:
X

array([[-1.  ,  0.  ,  0.  ,  0.  ],
       [ 1.  , -1.  , -1.  , -0.1 ],
       [-1.  ,  0.  ,  0.  ,  0.  ],
       ...,
       [ 0.25, -2.  ,  4.  ,  0.4 ],
       [ 0.25, -2.  ,  4.  ,  0.4 ],
       [ 0.25, -2.  ,  4.  ,  0.4 ]])