## Purpose of the notebook

This is a notebook for resampling data in participant dataframes and saving it to a new, smaller dataframe with just relevant trial data. The function resamples data and returns trials cut to -1:18 seconds to comply with protocol.

In [1]:
import sys

sys.path.insert(
    1, "..\\utilities\\"
)  # adds utilities folder to path so we can import modules from it, won't be needed after packaging

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import loading_utils as load
import datetime

participant_list = [200, 201, 202, 204, 205, 206, 207, 209, 210, 211, 212, 213]

In [None]:
# This is a block for making participant dataframes from raw files in directory data_dir (here Windows path to retinawise mirror folder on drive)
# It saves the participant dfs into directory defined in save_path (about 300 - 400 MB per participant), format 2xx_recording_data.csv
# Uncomment and run if you don't have these dataframes (remember that the folder specified in save_path must exist)
# data_dir = "D:/retinawise_mirror/raw/"
# save_path = './results/'
# for participant_id in participant_list:
#     data_df, protocol_timecourse_df, protocol_vars_df = load.load_participant_data(participant_no=participant_id,
#                                                                                    data_dir=data_dir,
#                                                                                    include_failed=False,
#                                                                                    save=True,
#                                                                                    save_path=save_path)

In [2]:
def resample_by_trial(data_df, sample_freq=50):
    # get time step in ms from sampling frequency provided
    time_step = 1000 / sample_freq

    # take subset of data without transition and adaptation parts
    data_subset = data_df[
        (data_df["Trial phase"] != "Adaptation")
        & (data_df["Trial phase"] != "Transition")
    ]

    # map trial-relevant variables to trial numbers for trial marking after resampling
    trial_list = sorted(data_subset["Trial no"].unique())
    stim_list = [
        data_subset["Trial type"][data_subset["Trial no"] == i].unique()[0]
        for i in trial_list
    ]
    block_list = [
        data_subset["Block"][data_subset["Trial no"] == i].unique()[0]
        for i in trial_list
    ]
    test_list = [
        data_subset["Test"][data_subset["Trial no"] == i].unique()[0]
        for i in trial_list
    ]
    recording_list = [
        data_subset["Recording id"][data_subset["Trial no"] == i].unique()[0]
        for i in trial_list
    ]
    eye_list = [
        data_subset["Eye"][data_subset["Trial no"] == i].unique()[0] for i in trial_list
    ]
    participant = data_subset["Participant id"].unique()[0]

    # make datetime index for resampling
    data_subset["Trial time datetime"] = data_subset["Trial time Sec"].apply(
        lambda x: datetime.timedelta(seconds=x)
    )
    data_subset.set_index("Trial time datetime", inplace=True)

    # resample by trial and create a new dataframe
    trials_for_new_df = []
    for i, trial_no in enumerate(trial_list):

        trial = data_subset[["Trial time Sec", "Stim eye - Size Mm"]][
            data_subset["Trial no"] == trial_no
        ].copy()
        trial.loc[datetime.timedelta(seconds=-1)] = (
            pd.Series()
        )  # add a row at -1s so that every trial has the same time ticks

        resampled_trial = trial.resample(str(time_step) + "ms").agg(
            {"Stim eye - Size Mm": "mean"}
        )
        resampled_trial = resampled_trial[
            datetime.timedelta(seconds=-1) : datetime.timedelta(seconds=18)
        ]
        # remake trial time column in seconds from new index
        resampled_trial["Trial time Sec"] = resampled_trial.index
        resampled_trial["Trial time Sec"] = resampled_trial["Trial time Sec"].apply(
            lambda x: x.total_seconds()
        )

        # mark trial based on mappings
        resampled_trial["Trial no"] = [trial_no] * len(resampled_trial)
        resampled_trial["Trial type"] = [stim_list[i]] * len(resampled_trial)
        resampled_trial["Block"] = [block_list[i]] * len(resampled_trial)
        resampled_trial["Test"] = [test_list[i]] * len(resampled_trial)
        resampled_trial["Recording id"] = [recording_list[i]] * len(resampled_trial)
        resampled_trial["Eye"] = [eye_list[i]] * len(resampled_trial)
        resampled_trial["Participant id"] = [participant] * len(resampled_trial)

        # mark trial phases based on protocol
        resampled_trial["Trial phase"] = ["N/A"] * len(resampled_trial)
        resampled_trial.loc[resampled_trial["Trial time Sec"] < 0, "Trial phase"] = (
            "pre-stim"
        )
        resampled_trial.loc[
            (resampled_trial["Trial time Sec"] >= 0)
            & (resampled_trial["Trial time Sec"] <= 5),
            "Trial phase",
        ] = "stim"
        resampled_trial.loc[resampled_trial["Trial time Sec"] > 5, "Trial phase"] = (
            "post-stim"
        )
        trials_for_new_df.append(resampled_trial)

    new_df = pd.concat(trials_for_new_df)
    new_df.reset_index(inplace=True)
    return new_df

In [3]:
data_dir = "./results/new/"

participant_id = 209
filepath = os.path.join(data_dir, str(participant_id) + "_recording_data.csv")
data_df = pd.read_csv(filepath)

In [4]:
resampled_df = resample_by_trial(data_df, sample_freq=50)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_subset["Trial time datetime"] = data_subset["Trial time Sec"].apply(


In [5]:
resampled_df

Unnamed: 0,Trial time datetime,Stim eye - Size Mm,Trial time Sec,Trial no,Trial type,Block,Test,Recording id,Eye,Participant id,Trial phase
0,-1 days +23:59:59,4.53878,-1.00,1.0,s,0,a,0,L,209,pre-stim
1,-1 days +23:59:59.020000,,-0.98,1.0,s,0,a,0,L,209,pre-stim
2,-1 days +23:59:59.040000,4.52158,-0.96,1.0,s,0,a,0,L,209,pre-stim
3,-1 days +23:59:59.060000,4.53736,-0.94,1.0,s,0,a,0,L,209,pre-stim
4,-1 days +23:59:59.080000,,-0.92,1.0,s,0,a,0,L,209,pre-stim
...,...,...,...,...,...,...,...,...,...,...,...
546820,0 days 00:00:17.920000,6.71894,17.92,575.0,flux,10,b,23,L,209,post-stim
546821,0 days 00:00:17.940000,,17.94,575.0,flux,10,b,23,L,209,post-stim
546822,0 days 00:00:17.960000,6.65416,17.96,575.0,flux,10,b,23,L,209,post-stim
546823,0 days 00:00:17.980000,6.62814,17.98,575.0,flux,10,b,23,L,209,post-stim


The result is a dataframe with trials resampled to 50 Hz, starting at -1 s and ending at 18 s. Thanks to hard coding the trial start and end time, each trial will have the same time stamps for samples, enabling calculation of mean etc. Resampling is done with all NaN values in stimulated eye removed.

### Resampling with removal

Legacy function for resampling data after removal of NaN values in the stimulated eye at timestamps where the other eye is measured.

In [6]:
def mark_not_measured(data_df):
    data_df["Stim eye - Measured"] = [False] * len(data_df)
    data_df.loc[data_df["Stim eye - Size Mm"].notna(), "Stim eye - Measured"] = True
    data_df.loc[
        (data_df["Right - Size Mm"].isna()) & (data_df["Left - Size Mm"].isna()),
        "Stim eye - Measured",
    ] = "missing"
    return data_df

In [7]:
data_df = mark_not_measured(data_df)
data_df = data_df[data_df["Stim eye - Measured"] != False]
resampled_df = resample_by_trial(data_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_subset["Trial time datetime"] = data_subset["Trial time Sec"].apply(


In [8]:
resampled_df

Unnamed: 0,Trial time datetime,Stim eye - Size Mm,Trial time Sec,Trial no,Trial type,Block,Test,Recording id,Eye,Participant id,Trial phase
0,-1 days +23:59:59,4.53878,-1.00,1.0,s,0,a,0,L,209,pre-stim
1,-1 days +23:59:59.020000,,-0.98,1.0,s,0,a,0,L,209,pre-stim
2,-1 days +23:59:59.040000,4.52158,-0.96,1.0,s,0,a,0,L,209,pre-stim
3,-1 days +23:59:59.060000,4.53736,-0.94,1.0,s,0,a,0,L,209,pre-stim
4,-1 days +23:59:59.080000,,-0.92,1.0,s,0,a,0,L,209,pre-stim
...,...,...,...,...,...,...,...,...,...,...,...
546820,0 days 00:00:17.920000,6.71894,17.92,575.0,flux,10,b,23,L,209,post-stim
546821,0 days 00:00:17.940000,,17.94,575.0,flux,10,b,23,L,209,post-stim
546822,0 days 00:00:17.960000,6.65416,17.96,575.0,flux,10,b,23,L,209,post-stim
546823,0 days 00:00:17.980000,6.62814,17.98,575.0,flux,10,b,23,L,209,post-stim
