### Making filepaths for protocol and data

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import loading_utils as load

rootdir = "..\Simulated_folder_2"

fp_protocol, fp_recording, fp_whole_exp = load.make_filepaths(rootdir)

print(fp_protocol, fp_recording, fp_whole_exp)

..\Simulated_folder_2\201_20240305_right_sine_b.csv ['..\\Simulated_folder_2\\Experiment_0001\\Sequence_0001\\data.csv', '..\\Simulated_folder_2\\Experiment_0001\\Sequence_0002\\data.csv', '..\\Simulated_folder_2\\Experiment_0001\\Sequence_0003\\data.csv'] ..\Simulated_folder_2\Experiment_0001\data.csv


### Making protocol dataframes

I split them into one with description variables and one with the timecourse, since they have different column numbers.

In [2]:
protocol_vars_df, protocol_timecourse_df = load.make_protocol_dfs(fp_protocol)

protocol_vars_df

Unnamed: 0,Var,Val 1,Val 2
0,LR.exp,civibe_201,
1,Date,09.11.2023,
2,Author(s),Hannah S. Heinrichs,
3,Photoreceptors,CIE tooolbox,
4,Calibration,Source,20230911.0
5,Version,1,0.0
6,,,
7,Sampling time [ms],33,
8,Start delay [s],0,0.0
9,Temperature aquisition interval [tick],20,


In [3]:
protocol_timecourse_df

Unnamed: 0,NumSample,Label L,LED L1,LED L2,LED L3,LED L4,LED L5,LED L6,L L,M L,...,LED R2,LED R3,LED R4,LED R5,LED R6,L R,M R,S R,g R,Eye
0,0,dark,0,0,0,0,0,0,0,0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0,0,0,R
1,7273,dark,0,0,0,0,0,0,0,0,...,0.500000,0.500000,0.500000,0.500000,0.500000,50,50,50,50,R
2,0,dark,0,0,0,0,0,0,0,0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0,0,0,R
3,1,dark,0,0,0,0,0,0,0,0,...,0.500000,0.500000,0.500000,0.500000,0.500000,100,100,100,0,R
4,1,dark,0,0,0,0,0,0,0,0,...,0.515792,0.462014,0.525009,0.537161,0.475807,100,100,100,0,R
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3847,1,dark,0,0,0,0,0,0,0,0,...,0.451295,0.451295,0.451295,0.451295,0.451295,100,100,100,100,R
3848,1,dark,0,0,0,0,0,0,0,0,...,0.463177,0.463177,0.463177,0.463177,0.463177,100,100,100,100,R
3849,1,dark,0,0,0,0,0,0,0,0,...,0.475311,0.475311,0.475311,0.475311,0.475311,100,100,100,100,R
3850,1,dark,0,0,0,0,0,0,0,0,...,0.487613,0.487613,0.487613,0.487613,0.487613,100,100,100,100,R


### Making experiment dataframe

I pull all data files from sequences and concatenate them into one experiment df. I add a column specifying which trial (or adaptation) the specific row belongs to.

In [5]:
experiment_df = load.make_experiment_df(fp_recording, fp_protocol)
experiment_df

Unnamed: 0,Overall time Sec,Sequence time Sec,Experiment state,Sequence index,Sequences count,Excitation index,Excitation label - Left,Excitation label - Right,Left - Is found,Left - Size Mm,...,Right - Size Mm,Right - Area Mm,Right - RadiusA Px,Right - RadiusB Px,Right - PosX Px,Right - PoxY Px,Right - Distance from focus,Right - Leds temp,Trial number,Eye
0,0.003,0.004,Active,1,26,2,dark,baseline,False,,...,9.37452,69.02204,189.94965,227.10638,351.82538,562.87006,64.18844,,Adaptation,R
1,0.009,0.010,Active,1,26,2,dark,baseline,False,,...,,,,,,,,,Adaptation,R
2,0.012,0.012,Active,1,26,2,dark,baseline,True,9.36967,...,,,,,,,,,Adaptation,R
3,0.028,0.029,Active,1,26,2,dark,baseline,False,,...,9.37800,69.07329,190.04164,227.16499,352.04379,563.15216,64.09731,,Adaptation,R
4,0.052,0.053,Active,1,26,2,dark,baseline,True,9.36619,...,,,,,,,,,Adaptation,R
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1120,280.235,19.370,Active,3,26,310,dark,baseline,True,6.45852,...,,,,,,,,,2,R
1121,280.250,19.385,Active,3,26,310,dark,baseline,False,,...,6.52256,33.41378,135.04973,154.63649,396.13867,478.16284,61.65505,,2,R
1122,280.277,19.411,Active,3,26,310,dark,baseline,True,6.50580,...,,,,,,,,,2,R
1123,280.284,19.419,Active,3,26,310,dark,baseline,False,,...,6.54294,33.62298,135.49684,155.09116,396.39682,478.14185,61.61373,,2,R


In [6]:
def make_whole_exp_df(fp_whole_exp: str, fp_protocol: str):
    """
    Function for making a dataframe from the whole test recording
    fp_whole_exp - string, path to csv file with whole test data
    fp_protocol - string, path to csv with protocol data (for eye information)
    Returns:
    data_df - DataFrame with data from the whole test recording, with an added column 'Eye' - L or R
    """

    data_df = pd.read_csv(fp_whole_exp, delimiter=";")
    data_df["Eye"] = [
        "L" if "left" in fp_protocol else "R" for i in range(len(data_df))
    ]
    return data_df


whole_df = make_whole_exp_df(fp_whole_exp, fp_protocol)
whole_df

Unnamed: 0,Overall time Sec,Sequence time Sec,Experiment state,Sequence index,Sequences count,Excitation index,Excitation label - Left,Excitation label - Right,Left - Is found,Left - Size Mm,...,Right - Is found,Right - Size Mm,Right - Area Mm,Right - RadiusA Px,Right - RadiusB Px,Right - PosX Px,Right - PoxY Px,Right - Distance from focus,Right - Leds temp,Eye
0,0.003,0.004,Active,1,26,2.0,dark,baseline,False,,...,True,9.37452,69.02204,189.94965,227.10638,351.82538,562.87006,64.18844,,R
1,0.009,0.010,Active,1,26,2.0,dark,baseline,False,,...,False,,,,,,,,,R
2,0.012,0.012,Active,1,26,2.0,dark,baseline,True,9.36967,...,False,,,,,,,,,R
3,0.028,0.029,Active,1,26,2.0,dark,baseline,False,,...,True,9.37800,69.07329,190.04164,227.16499,352.04379,563.15216,64.09731,,R
4,0.052,0.053,Active,1,26,2.0,dark,baseline,True,9.36619,...,False,,,,,,,,,R
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42586,731.013,19.367,Active,26,26,3852.0,dark,baseline,True,6.65892,...,False,,,,,,,,,R
42587,731.029,19.383,Active,26,26,3852.0,dark,baseline,False,,...,True,6.65047,34.73716,137.13200,158.31989,319.60846,490.75089,102.88291,,R
42588,731.046,19.400,Active,26,26,3852.0,dark,baseline,True,6.62445,...,False,,,,,,,,,R
42589,731.063,19.417,Active,26,26,3852.0,dark,baseline,False,,...,True,6.61454,34.36286,136.34479,157.51820,318.95346,490.91568,103.39003,,R


With new data, the whole experiment seems to include all sequences. Now I'll compare the trial data with whole test data to check if there aren't positions missing.

In [8]:
print('Length of adaptation phase in whole vs concatenated:', len(whole_df[whole_df['Sequence index']==1]),len(experiment_df[experiment_df['Sequence index']==1]))
print('Length of trial 1 in whole vs concatenated:', len(whole_df[whole_df['Sequence index']==2]),len(experiment_df[experiment_df['Sequence index']==2]))
print('Length of trial 2 in whole vs concatenated:', len(whole_df[whole_df['Sequence index']==3]),len(experiment_df[experiment_df['Sequence index']==3]))


Length of adaptation phase in whole vs concatenated: 13911 13911
Length of trial 1 in whole vs concatenated: 1181 1172
Length of trial 2 in whole vs concatenated: 1132 1125


From this data, there is a difference of about 10 samples between concatenated and whole test dataframes in trials. The whole test df seems to have more samples available. Adaptation phase has the same length in both.

In [9]:
print('Start time of trial 1 in whole vs concatenated:', whole_df[whole_df['Sequence index']==2]['Sequence time Sec'].min(),experiment_df[experiment_df['Sequence index']==2]['Sequence time Sec'].min())
print('End time of trial 1 in whole vs concatenated:', whole_df[whole_df['Sequence index']==2]['Sequence time Sec'].max(),experiment_df[experiment_df['Sequence index']==2]['Sequence time Sec'].max())
print('Start time of trial 2 in whole vs concatenated:', whole_df[whole_df['Sequence index']==3]['Sequence time Sec'].min(),experiment_df[experiment_df['Sequence index']==3]['Sequence time Sec'].min())
print('End time of trial 2 in whole vs concatenated:', whole_df[whole_df['Sequence index']==3]['Sequence time Sec'].max(),experiment_df[experiment_df['Sequence index']==3]['Sequence time Sec'].max())


Start time of trial 1 in whole vs concatenated: 0.01 0.01
End time of trial 1 in whole vs concatenated: 19.431 19.431
Start time of trial 2 in whole vs concatenated: 0.009 0.009
End time of trial 2 in whole vs concatenated: 19.437 19.437


Despite the samples difference, the start and end times for the trials overlap.