### Making filepaths for protocol and data

In [27]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import loading_utils as load

rootdir = "..\Simulated_folder"

fp_protocol, fp_recording, fp_whole_exp = load.make_filepaths(rootdir)

print(fp_protocol, fp_recording, fp_whole_exp)

..\Simulated_folder\201_20240305_right_sine_b.csv ['..\\Simulated_folder\\Experiment_0001\\Sequence_0001\\data.csv', '..\\Simulated_folder\\Experiment_0001\\Sequence_0002\\data.csv', '..\\Simulated_folder\\Experiment_0001\\Sequence_0003\\data.csv', '..\\Simulated_folder\\Experiment_0001\\Sequence_0004\\data.csv'] ..\Simulated_folder\Experiment_0001\data.csv


### Making protocol dataframes

I split them into one with description variables and one with the timecourse, since they have different column numbers.

In [21]:
protocol_vars_df, protocol_timecourse_df = load.make_protocol_dfs(fp_protocol)

protocol_vars_df

Unnamed: 0,Var,Val 1,Val 2
0,LR.exp,civibe_201,
1,Date,09.11.2023,
2,Author(s),Hannah S. Heinrichs,
3,Photoreceptors,CIE tooolbox,
4,Calibration,Source,20230911.0
5,Version,1,0.0
6,,,
7,Sampling time [ms],33,
8,Start delay [s],0,0.0
9,Temperature aquisition interval [tick],20,


In [22]:
protocol_timecourse_df

Unnamed: 0,NumSample,Label L,LED L1,LED L2,LED L3,LED L4,LED L5,LED L6,L L,M L,...,LED R2,LED R3,LED R4,LED R5,LED R6,L R,M R,S R,g R,Eye
0,0,dark,0,0,0,0,0,0,0,0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0,0,0,R
1,7273,dark,0,0,0,0,0,0,0,0,...,0.500000,0.500000,0.500000,0.500000,0.500000,50,50,50,50,R
2,0,dark,0,0,0,0,0,0,0,0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0,0,0,0,R
3,1,dark,0,0,0,0,0,0,0,0,...,0.500000,0.500000,0.500000,0.500000,0.500000,100,100,100,0,R
4,1,dark,0,0,0,0,0,0,0,0,...,0.515792,0.462014,0.525009,0.537161,0.475807,100,100,100,0,R
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3847,1,dark,0,0,0,0,0,0,0,0,...,0.451295,0.451295,0.451295,0.451295,0.451295,100,100,100,100,R
3848,1,dark,0,0,0,0,0,0,0,0,...,0.463177,0.463177,0.463177,0.463177,0.463177,100,100,100,100,R
3849,1,dark,0,0,0,0,0,0,0,0,...,0.475311,0.475311,0.475311,0.475311,0.475311,100,100,100,100,R
3850,1,dark,0,0,0,0,0,0,0,0,...,0.487613,0.487613,0.487613,0.487613,0.487613,100,100,100,100,R


### Making experiment dataframe

I pull all data files from sequences and concatenate them into one experiment df. I add a column specifying which trial (or adaptation) the specific row belongs to.

In [26]:
experiment_df = load.make_experiment_df(fp_recording, fp_protocol)
experiment_df

Unnamed: 0,Overall time Sec,Sequence time Sec,Experiment state,Sequence index,Sequences count,Excitation index,Excitation label - Left,Excitation label - Right,Left - Is found,Left - Size Mm,...,Right - Size Mm,Right - Area Mm,Right - RadiusA Px,Right - RadiusB Px,Right - PosX Px,Right - PoxY Px,Right - Distance from focus,Right - Leds temp,Trial number,Eye
0,0.013,0.013,Active,1,26,2,baseline,dark,False,,...,,,,,,,,22.1284,Adaptation,R
1,0.023,0.024,Active,1,26,2,baseline,dark,True,7.77159,...,7.40274,43.04020,152.75288,176.10225,339.23764,618.92163,108.15797,,Adaptation,R
2,0.038,0.039,Active,1,26,2,baseline,dark,True,7.78543,...,,,,,,,,,Adaptation,R
3,0.045,0.046,Active,1,26,2,baseline,dark,False,,...,7.39352,42.93307,152.29980,176.18649,337.55472,618.94867,109.30186,,Adaptation,R
4,0.074,0.075,Active,1,26,2,baseline,dark,True,7.79610,...,,,,,,,,,Adaptation,R
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1073,299.857,19.353,Active,4,26,464,baseline,dark,True,5.58578,...,,,,,,,,,3,R
1074,299.878,19.374,Active,4,26,464,baseline,dark,False,,...,5.38350,22.76246,109.74530,129.63231,299.84067,565.17151,114.43202,,3,R
1075,299.893,19.389,Active,4,26,464,baseline,dark,True,5.60921,...,,,,,,,,,3,R
1076,299.917,19.413,Active,4,26,464,baseline,dark,False,,...,5.39565,22.86530,109.95078,129.97461,299.95258,564.57306,114.18256,,3,R


In [25]:
def make_whole_exp_df(fp_whole_exp: str, fp_protocol: str):
    """
    Function for making a dataframe from the whole test recording
    fp_whole_exp - string, path to csv file with whole test data
    fp_protocol - string, path to csv with protocol data (for eye information)
    Returns:
    data_df - DataFrame with data from the whole test recording, with an added column 'Eye' - L or R
    """

    data_df = pd.read_csv(fp_whole_exp, delimiter=";")
    data_df["Eye"] = [
        "L" if "left" in fp_protocol else "R" for i in range(len(data_df))
    ]
    return data_df


whole_df = make_whole_exp_df(fp_whole_exp, fp_protocol)
whole_df

Unnamed: 0,Overall time Sec,Sequence time Sec,Experiment state,Sequence index,Sequences count,Excitation index,Excitation label - Left,Excitation label - Right,Left - Is found,Left - Size Mm,...,Right - Is found,Right - Size Mm,Right - Area Mm,Right - RadiusA Px,Right - RadiusB Px,Right - PosX Px,Right - PoxY Px,Right - Distance from focus,Right - Leds temp,Eye
0,0.002,0.003,Active,1,26,2,dark,baseline,True,9.75677,...,False,,,,,,,,,R
1,0.009,0.009,Active,1,26,2,dark,baseline,False,,...,False,,,,,,,,,R
2,0.013,0.014,Active,1,26,2,dark,baseline,False,,...,True,9.18128,66.20587,181.84750,227.54599,363.23022,647.56787,119.52853,22.18677,R
3,0.025,0.025,Active,1,26,2,dark,baseline,True,9.76284,...,False,,,,,,,,,R
4,0.042,0.042,Active,1,26,2,dark,baseline,False,,...,True,9.18222,66.21942,181.82399,227.62199,363.06558,647.52411,119.55433,,R
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
680,11.269,11.270,Active,1,26,2,dark,baseline,True,7.47879,...,False,,,,,,,,,R
681,11.291,11.292,Active,1,26,2,dark,baseline,False,,...,True,7.62195,45.62707,155.88235,182.93871,394.16666,618.08264,81.83270,,R
682,11.309,11.310,Active,1,26,2,dark,baseline,True,7.52094,...,False,,,,,,,,,R
683,11.327,11.327,Active,1,26,2,dark,baseline,False,,...,True,7.65292,45.99862,156.49380,183.70781,394.13870,618.28455,82.03604,,R


Comparing the whole test dataframe and the concatenated dataframe from separate trial data, it can be seen that they have different row numbers. The concatenated one appears to include complete data (it runs to 300 seconds), meanwhile the whole test dataframe only includes 11 seconds of the recording. Also, in this data I received it seems that the whole test data is concerned with a different eye - it does not overlap with the concatenated data, so it's possible there is an error here. 

In general though, even without errors, I would say that loading data from separate files may be a more straightforward method, as it allows marking of the trials while loading the data. Since the baseline is meant to be calculated from the trial before, segmentation should be easier with a column indicating that.