In [1]:
import sys
sys.path.insert(1, '..\\utilities\\') #adds utilities folder to path so we can import modules from it, won't be needed after packaging

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import loading_utils as load

In [3]:
data_dir = './results'

for file in os.listdir(data_dir):
    if file.endswith('full_data.csv'):
        path_to_file = os.path.join(data_dir,file)
        participant_df = pd.read_csv(path_to_file)
        transition_times = []
        for session in participant_df['Session id'].unique():
            session_df = participant_df[participant_df['Session id']==session]
            transition_times_per_sess = [len(session_df[(session_df['Sequence index']==i)&(session_df['Experiment state']=='Passive')]) for i in session_df['Sequence index'].unique()]
            transition_times.append(transition_times_per_sess)
            transition_times_per_sess = []
        transition_times_flat=sum(transition_times,[])
        max_time = np.max(transition_times_flat)
        min_time = np.min(transition_times_flat)
        mean_time = np.mean(transition_times_flat)
        std_time=np.std(transition_times_flat)
        participant_df=[]
        print(f'Statistics of passive time in samples for participant {file[0:3]}')
        print(f'max: {max_time}, min: {min_time}, mean: {mean_time}, stdev: {std_time}')

Statistics of transition time in samples for participant 200
max: 11, min: 0, mean: 7.874587458745874, stdev: 2.02560696476002
Statistics of transition time in samples for participant 201
max: 11, min: 0, mean: 7.886446886446887, stdev: 1.966272252136838
Statistics of transition time in samples for participant 202
max: 11, min: 0, mean: 7.917065390749602, stdev: 1.9766126197802663
Statistics of transition time in samples for participant 205
max: 11, min: 0, mean: 7.948811700182816, stdev: 1.8864312609755909
Statistics of transition time in samples for participant 206
max: 11, min: 0, mean: 7.9825174825174825, stdev: 1.8930426422821431
Statistics of transition time in samples for participant 207
max: 12, min: 0, mean: 7.951666666666667, stdev: 1.9431925334928144
Statistics of transition time in samples for participant 209
max: 11, min: 0, mean: 7.978297161936561, stdev: 1.874491745518053
Statistics of transition time in samples for participant 210
max: 11, min: 0, mean: 7.93620689655172

There don't seem to be outliers in passive time when it comes to long transitions. For each participant, the times range from 0 to ~10 samples, which is about 0-0.4 s.  

In [16]:
data_dir = './results'

for file in os.listdir(data_dir):
    if file.endswith('full_data.csv'):
        print(f'Statistics of sequence length for participant {file[0:3]}')
        path_to_file = os.path.join(data_dir,file)
        participant_df = pd.read_csv(path_to_file)
        sequence_times = []
        sequence_times_sec = []
        for session in participant_df['Session id'].unique():
            session_df = participant_df[participant_df['Session id']==session]
            sequence_times_per_sess = [len(session_df[(session_df['Sequence index']==i)&(session_df['Experiment state']=='Active')]) for i in sorted(session_df['Sequence index'].unique())[1::]]
            sequence_times_sec_per_sess = [np.max(session_df['Sequence time Sec'][(session_df['Sequence index']==i)&(session_df['Experiment state']=='Active')]) - np.min(session_df['Sequence time Sec'][(session_df['Sequence index']==i)&(session_df['Experiment state']=='Active')]) for i in sorted(session_df['Sequence index'].unique())[1::]]            
            sequence_times.append(sequence_times_per_sess)
            sequence_times_sec.append(sequence_times_sec_per_sess)
            for i,sequence_time in enumerate(sequence_times_per_sess):
                if sequence_time>1200 or sequence_time<500:
                    print(f'Outlier in session: {session}, sequence: {i+1}, outlier time in samples: {sequence_time}, outlier time in seconds: {sequence_times_sec_per_sess[i]}')
            sequence_times_per_sess = []
        sequence_times_flat=sum(sequence_times,[])
        sequence_times_sec_flat=sum(sequence_times_sec,[])
        max_time = np.max(sequence_times_flat)
        min_time = np.min(sequence_times_flat)
        mean_time = np.mean(sequence_times_flat)
        std_time=np.std(sequence_times_flat)
        participant_df=[]
        print(f'max: {max_time}, min: {min_time}, mean: {mean_time}, stdev: {std_time} samples')
        print(f'max: {np.max(sequence_times_sec_flat)}, min: {np.array(sequence_times_sec_flat)[np.array(sequence_times_flat)==min_time]} or {np.min(sequence_times_sec_flat)} , mean: {np.mean(sequence_times_sec_flat)}, stdev: {np.std(sequence_times_sec_flat)} seconds \n')

Statistics of sequence length for participant 200
Outlier in session: 20, sequence: 4, outlier time in samples: 2593, outlier time in seconds: 44.234
Outlier in session: 23, sequence: 25, outlier time in samples: 3306, outlier time in seconds: 57.401
max: 3306, min: 1001, mean: 1140.7582037996547, stdev: 113.53880704667438 samples
max: 57.401, min: [19.421] or 19.267000000000003 , mean: 19.540231433506044, stdev: 1.8817672778699341 seconds 

Statistics of sequence length for participant 201
max: 1176, min: 953, mean: 1131.8209523809523, stdev: 36.04761849405548 samples
max: 19.468, min: [19.445] or 19.235999999999997 , mean: 19.432224761904763, stdev: 0.01965092888124353 seconds 

Statistics of sequence length for participant 202
max: 1176, min: 1007, mean: 1142.325, stdev: 27.471367670115495 samples
max: 19.467, min: [19.459] or 19.389 , mean: 19.433439999999997, stdev: 0.013328030612209675 seconds 

Statistics of sequence length for participant 205
max: 1176, min: 1058, mean: 1148.38

Looking at sequence times (not including the adaptation sequence), it seems there are a few outliers present (defined as sequence duration>1200 samples or <500 samples). Possibly the extremely long ones are the sequences with long transition times, where however the experiment state is still marked as active. 

Looking at the times in seconds, it seems that also the sampling is not as stated in protocol data, where it was said to be 33 ms. Here we can see it's not uniform, e.g. for participant 210: 6031 samples is 430 seconds, which is a sampling frequency of 14 Hz, meanwhile 815 samples is 16.5 seconds, which is a sampling frequency of 50 Hz. I also checked if minimum(sequence duration in seconds) is equal to sequence duration in seconds corresponding to minimum duration in samples (value in brackets vs value without). They are not equal, further confirming that sampling is not uniform. This can cause issues in filtering the signal and will need to be kept in mind for preprocessing.