**Table of contents**<a id='toc0_'></a>    
- [Statistics of block completeness after only percentage based trial rejection](#toc1_)    
    - [Number of blocks accepted per participant](#toc1_1_1_)    
- [Block completeness after percentage and long nan trial removal](#toc2_)    
    - [Number of blocks accepted per participant](#toc2_1_1_)    

<!-- vscode-jupyter-toc-config
	numbering=false
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

In [58]:
import sys

sys.path.insert(
    1, "..\\utilities\\"
)  # adds utilities folder to path so we can import modules from it, won't be needed after packaging

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import loading_utils as load
import preprocessing_utils as prep

participant_list = [200, 201, 202, 204, 205, 206, 207, 209, 210, 211, 212, 213]

In [1]:
data_dir = "./results/resampled/"  # directory with resampled data
data_suffix = "_nonan_30_resampled_data.csv"  # name of file with 30 Hz resampled data from participant 2xx, name format: 2xxdata_suffix

In [None]:
# This is a block for making participant dataframes from raw files in directory data_dir (here Windows path to retinawise mirror folder on drive)
# It saves the participant dfs into directory defined in save_path (about 300 - 400 MB per participant), format 2xx_recording_data.csv
# Uncomment and run if you don't have these dataframes (remember that the folder specified in save_path must exist)
# data_dir = "D:/retinawise_mirror/raw/"
# save_path = './results/'
# for participant_id in participant_list:
#     data_df, protocol_timecourse_df, protocol_vars_df = load.load_participant_data(participant_no=participant_id,
#                                                                                    data_dir=data_dir,
#                                                                                    include_failed=False,
#                                                                                    save=True,
#                                                                                    save_path=save_path)

In [None]:
# This is a block for resampling participant dataframes to 30 Hz and extracting only trials -1:18 s
# It loads the participant dfs from data_dir (e.g. the save_path from block above)
# And saves into directory save_path with format 2xxdata_suffix
# Uncomment and run if you don't have the resampled dataframes (remember the folder specified in save_path must exist)
# data_dir = "./results/"
# save_path = './results/resampled/'
# data_suffix = "_nonan_30_resampled_data.csv"
# for participant_id in participant_list:
#     data_path = os.path.join(data_dir,str(participant_id)+'_recording_data.csv')
#     data_df = pd.read_csv(data_path)
#     resampled_df = prep.resample_by_trial(data_df,sample_freq=30)
#     save_filepath = os.path.join(save_path,str(participant_id)+data_suffix)
#     resampled_df.to_csv(save_filepath)
#     data_df = []
#     resampled_df = []

In [None]:
def rejection_pipeline(data_df,funcs):
    new_df = data_df.copy(deep=True)
    for func in funcs:
        new_df = func(new_df)
    return new_df

# <a id='toc1_'></a>[Statistics of block completeness after only percentage based trial rejection](#toc0_)

complete trial: 75% POI 1:6 s, 40% baseline -1:0 s

complete condition in a block: min. 3 accepted trials

complete block: has complete flux and complete other condition

In [4]:
funcs = [prep.remove_trials_below_percentage,prep.remove_bad_conditions,prep.remove_bad_blocks]

In [61]:
blocks = np.arange(0,11)
conditions = ['flux','l-m','lms','mel','s']

def make_completeness_stats_df(participant_list,blocks,conditions,funcs):
    completeness_dict = {'Participant':[],'Block':[],'Condition':[],'Trial count':[],'Block available':[]}

    for participant_id in participant_list:
        data_path = os.path.join(data_dir, str(participant_id) + data_suffix)
        data_df = pd.read_csv(data_path)

        no_low_block_df = rejection_pipeline(data_df,funcs)
        
        groupby_df = (
        no_low_block_df[["Block", "Trial type", "Trial no"]]
        .groupby(["Block", "Trial type"])
        .agg("nunique")
    )
        
        groupby_df.reset_index(inplace=True)
        
        
        for block in blocks:
            for condition in conditions:
                if block in groupby_df['Block'].values:
                    block_acc = 'yes'
                    if condition in groupby_df['Trial type'][groupby_df['Block']==block].values:
                        count = groupby_df['Trial no'][(groupby_df['Block']==block)&(groupby_df['Trial type']==condition)].values[0]
                    else:
                        count = 'less than 3'
                else:
                    block_acc = 'no'
                    count = 'less than 3'
                completeness_dict['Participant'].append(participant_id)
                completeness_dict['Block'].append(block)
                completeness_dict['Condition'].append(condition)
                completeness_dict['Trial count'].append(count)
                completeness_dict['Block available'].append(block_acc)

    completeness_df = pd.DataFrame(completeness_dict)
    return completeness_df

In [None]:
percentage_removal_compl_df = make_completeness_stats_df(participant_list,blocks,conditions,funcs)

In [None]:
percentage_removal_compl_df.to_csv('block_completeness_percentage_only.csv')

### <a id='toc1_1_1_'></a>[Number of blocks accepted per participant](#toc0_)

In [56]:
percentage_removal_compl_df[['Participant','Block']][percentage_removal_compl_df['Block available'] == 'yes'].groupby(['Participant']).nunique()

Unnamed: 0_level_0,Block
Participant,Unnamed: 1_level_1
200,11
201,11
202,11
204,11
205,7
206,10
207,8
209,4
210,6
211,11


Groupby above shows how many blocks are accepted for each participant.

# <a id='toc2_'></a>[Block completeness after percentage and long nan trial removal](#toc0_)

Conditions as above plus removal of trials with NaNs longer than 625 ms.

In [59]:
funcs = [prep.remove_trials_below_percentage,prep.remove_trials_with_long_nans,prep.remove_bad_conditions,prep.remove_bad_blocks]

In [None]:
percentage_nan_removal_compl_df = make_completeness_stats_df(participant_list,blocks,conditions,funcs)
percentage_removal_compl_df.to_csv('block_completeness_percentage_nans.csv')

### <a id='toc2_1_1_'></a>[Number of blocks accepted per participant](#toc0_)

In [63]:
percentage_nan_removal_compl_df[['Participant','Block']][percentage_nan_removal_compl_df['Block available'] == 'yes'].groupby(['Participant']).nunique()

Unnamed: 0_level_0,Block
Participant,Unnamed: 1_level_1
200,11
201,11
202,11
204,10
205,6
206,10
207,7
209,3
210,3
211,11
