In [None]:
import sys

sys.path.insert(
    1, "..\\utilities\\"
)  # adds utilities folder to path so we can import modules from it, won't be needed after packaging

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import loading_utils as load
import datetime

participant_list=[200,201,202,204,205,206,207,209,210,211,212,213]

In [2]:
def resample_by_trial(data_df):
    #take subset of data without transition and adaptation parts
    data_subset = data_df[(data_df['Trial phase']!='Adaptation') & (data_df['Trial phase']!='Transition')]

    #map trial-relevant variables to trial numbers for trial marking after resampling
    trial_list = sorted(data_subset['Trial no'].unique())
    stim_list = [data_subset['Trial type'][data_subset['Trial no']==i].unique()[0] for i in trial_list]
    block_list = [data_subset['Block'][data_subset['Trial no']==i].unique()[0] for i in trial_list]
    test_list = [data_subset['Test'][data_subset['Trial no']==i].unique()[0] for i in trial_list]
    recording_list = [data_subset['Recording id'][data_subset['Trial no']==i].unique()[0] for i in trial_list]
    eye_list = [data_subset['Eye'][data_subset['Trial no']==i].unique()[0] for i in trial_list]
    
    #make datetime index for resampling
    data_subset['Trial time datetime'] = data_subset['Trial time Sec'].apply(lambda x: datetime.timedelta(seconds = x))
    data_subset.set_index('Trial time datetime',inplace=True)
    
    #resample by trial and create a new dataframe
    trials_for_new_df = []
    for i,trial_no in enumerate(trial_list):

        trial=data_subset[['Trial time Sec','Stim eye - Size Mm']][data_subset['Trial no']==trial_no].copy()
        trial.loc[datetime.timedelta(seconds=-1)] = pd.Series() #add a row at -1s so that every trial has the same time ticks
        
        resampled_trial=trial.resample('20ms').agg({'Stim eye - Size Mm':'mean'})
        
        #remake trial time column in seconds from new index
        resampled_trial['Trial time Sec'] = resampled_trial.index
        resampled_trial['Trial time Sec'] = resampled_trial['Trial time Sec'].apply(lambda x: x.total_seconds())
        
        #mark trial based on mappings
        resampled_trial['Trial no'] = [trial_no]*len(resampled_trial)
        resampled_trial['Trial type'] = [stim_list[i]]*len(resampled_trial)
        resampled_trial['Block'] = [block_list[i]]*len(resampled_trial)
        resampled_trial['Test'] = [test_list[i]]*len(resampled_trial)
        resampled_trial['Recording id'] = [recording_list[i]]*len(resampled_trial)
        resampled_trial['Eye'] = [eye_list[i]]*len(resampled_trial)

        #mark trial phases based on protocol
        resampled_trial['Trial phase'] = ['N/A']*len(resampled_trial)
        resampled_trial.loc[resampled_trial['Trial time Sec']<0,'Trial phase'] = 'pre-stim' 
        resampled_trial.loc[(resampled_trial['Trial time Sec']>=0)&(resampled_trial['Trial time Sec']<=5),'Trial phase'] = 'stim'
        resampled_trial.loc[resampled_trial['Trial time Sec']>5,'Trial phase'] = 'post-stim'
        trials_for_new_df.append(resampled_trial)

    new_df = pd.concat(trials_for_new_df)
    new_df.reset_index(inplace=True)
    return new_df

In [3]:
data_dir = './results/new'

participant_id = 200
filepath=os.path.join(data_dir,str(participant_id)+'_recording_data.csv')
data_df = pd.read_csv(filepath)


In [4]:
resampled_df = resample_by_trial(data_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_subset['Trial time datetime'] = data_subset['Trial time Sec'].apply(lambda x: datetime.timedelta(seconds = x))


In [5]:
resampled_df

Unnamed: 0,Trial time datetime,Stim eye - Size Mm,Trial time Sec,Trial no,Trial type,Block,Test,Recording id,Eye,Trial phase
0,-1 days +23:59:59,6.85329,-1.00,1.0,s,0,a,0,R,pre-stim
1,-1 days +23:59:59.020000,,-0.98,1.0,s,0,a,0,R,pre-stim
2,-1 days +23:59:59.040000,6.84270,-0.96,1.0,s,0,a,0,R,pre-stim
3,-1 days +23:59:59.060000,6.83031,-0.94,1.0,s,0,a,0,R,pre-stim
4,-1 days +23:59:59.080000,,-0.92,1.0,s,0,a,0,R,pre-stim
...,...,...,...,...,...,...,...,...,...,...
566272,0 days 00:00:19.360000,7.03316,19.36,575.0,lms,10,b,28,R,post-stim
566273,0 days 00:00:19.380000,7.03606,19.38,575.0,lms,10,b,28,R,post-stim
566274,0 days 00:00:19.400000,,19.40,575.0,lms,10,b,28,R,post-stim
566275,0 days 00:00:19.420000,7.04224,19.42,575.0,lms,10,b,28,R,post-stim


The result is a dataframe with trials resampled to 50 Hz, starting at -1 s. Thanks to hard coding the trial start time, each trial will have the same time stamps for samples, enabling calculation of mean etc.