In [None]:
import sys
from pathlib import Path

import h5py as h5
import numpy as np
import pandas as pd
import seaborn as sns

sys.path.append('../../lib')
from local_paths import analysis_dir

# Parameters

In [None]:
#============================================================================
# analysis type and result path
#============================================================================
# - stim onset-aligned
# analysis_name = 'stim_on'
# results_subdir = 'fix0_self_consistency_boot200-t2hh'

# - OR, fixation onset-aligned
analysis_name = 'fix_on'
results_subdir ='self_consistency_boot200-cp'

output_sfx = ''

#============================================================================
# selection criteria
#============================================================================
boots_spread = 25
min_clearance = 100
min_boots_frac = 0.5

# lower-clip stim-on latency at 0?  only relevant if analysis_name == stim_on
clamp_stim_on = True

# Preamble

In [None]:
results_dir = Path(analysis_dir+results_subdir).expanduser()
assert results_dir.is_dir()

if analysis_name == 'fix_on':
    res_tag = results_dir.stem.replace('self_consistency_boot200-cp', '')
else:
    res_tag = results_dir.stem.replace('fix0_self_consistency_boot200-t2hh', '')
if res_tag:
    res_tag = '-' + res_tag
print('results name tag:', res_tag)

clamp_stim_on = clamp_stim_on and (analysis_name == 'stim_on')

cols_to_save = ['Latency', 'Boots. stdev.', 'Clearance', 'T step', 'Boots. frac.']
if analysis_name == 'fix_on':
    cols_to_save += ['Peak 0 SC', 'Peak 0 time', 'Peak 1 SC', 'Peak 1 time']
else:
    cols_to_save += ['Peak SC', 'Peak time']

# Load data

In [None]:
df = []
for fp in results_dir.glob('*.h5'):
    with h5.File(fp, 'r') as f:
        try:
            if analysis_name == 'fix_on':
                assert f['progress_report/crossing_point/all_done'][()]
            else:
                assert f['progress_report/time_to_half_height/all_done'][()]
        except (KeyError, AssertionError):
            continue

    if analysis_name == 'fix_on':
        df_ = pd.read_hdf(fp, 'crossing_point/latency_dataframe')
    else:
        try:
            df_ = pd.read_hdf(fp, 'time_to_half_height/return_fixation/latency_dataframe')
        except KeyError:
            continue

    df.append(df_)

df = pd.concat(df).set_index(['Session', 'Level', 'Name'])
assert not df.index.has_duplicates
print(df.shape)

# Select from all results

In [None]:
def select_latetency(
        lat_df, analysis_name=analysis_name,
        min_boots_frac=min_boots_frac, boots_spread=boots_spread,
        min_clearance=min_clearance,
        clamp_stim_on=clamp_stim_on):

    df = lat_df.copy()
    criteria = {}
    criteria['Is valid'] = np.isfinite(lat_df['Latency'].values)

    criteria['Clearance'] = lat_df['Clearance'].values >= min_clearance
    if analysis_name == 'fix_on':
        criteria['Peak time'] = (
            (lat_df['Latency'].values >= lat_df['Peak 0 time'].values)
            & (lat_df['Latency'].values <= lat_df['Peak 1 time'].values))
    else:
        criteria['Peak time'] = lat_df['Latency'].values <= lat_df['Peak time'].values

    if clamp_stim_on:
        criteria['Is positive'] = lat_df['Latency'].values > 0

    criteria['Boots. frac.'] = lat_df['Boots. frac., train'].values >= min_boots_frac
    criteria['Boots. spread'] = (
        (np.abs(lat_df['Boots. bias, train'].values) < boots_spread)
        & (lat_df['Boots. stdev., train'].values < boots_spread))
    criteria['Boots. clearance'] = lat_df['Boots. mean clearance, train'].values >= min_clearance

    for k, m in criteria.items():
        print(f'criterion: {k:<20} passed: {m.mean()*100:.1f}% ({m.sum()} of {m.size})')
    m = np.all(list(criteria.values()), axis=0)
    print(f'criterion: {"All":<20} passed: {m.mean()*100:.1f}% ({m.sum()} of {m.size})')

    return m

In [None]:
lat_df = df.copy()
lat_df['Selected'] = select_latetency(lat_df)

In [None]:
lat_df.loc[lat_df['Selected'], cols_to_save].to_csv(
    f'summary/{analysis_name}_latency{res_tag}{output_sfx}.csv.gz')

# Summarize array-level results

In [None]:
arreg = pd.read_csv('../../db/bank_array_regions.csv').astype({'Array ID': str})
arreg['Subject'] = [v[:2] for v in arreg['Session']]
arreg = arreg.groupby(['Subject', 'Array ID']).first()

In [None]:
lat_df = df.reset_index()
lat_df = lat_df[lat_df['Level']=='Array'].copy()
lat_df['Selected'] = select_latetency(lat_df)

In [None]:
adf = lat_df[lat_df['Selected']].copy()
adf['Subject'] = [v[:2] for v in adf['Session']]
adf['Region'] = arreg.loc[list(map(tuple, adf[['Subject', 'Name']].values))]['Region'].values

In [None]:
print('Array-level latencies, per session')
sns.histplot(data=adf, x='Latency', hue='Region', stat='density', element='poly', common_norm=False, fill=False)

In [None]:
print('Array-level latencies, per session')
print('After selecting for test boots frac')
# note here and below, we select based on test boots frac because
# 1) array-level results are never reported but used as default parameters
# 2) as default parameters, the values should be reliable
adf = adf[adf['Boots. stdev.'] < boots_spread]  # select once
sns.histplot(data=adf, x='Latency', hue='Region', stat='density', element='poly', common_norm=False, fill=False);

In [None]:
print('Array-level latencies, median across sessions')
df_ = adf.groupby(['Subject', 'Name']).agg({
    'Latency': 'median', 'Region': 'first', 'Selected': 'mean', 'Boots. stdev.': 'median'})
assert df_['Selected'].all()  # sanity check
df_ = df_[df_['Boots. stdev.'] < boots_spread]
sns.histplot(data=df_, x='Latency', hue='Region', stat='density', element='poly', common_norm=False, fill=False);
df_

In [None]:
print('Array-level latencies, median across arrays')
df_.groupby('Region').median()

In [None]:
# save array-level resuilts median across sessions
gb = adf.groupby(['Subject', 'Name'])
df_ = gb[cols_to_save].median()
df_['Count'] = gb['Latency'].count()
df_['Region'] = gb['Region'].first()
df_['Level'] = 'Array'
df_ = df_[df_['Boots. stdev.'] < boots_spread]  # select twice
df_ = df_.reset_index().set_index(['Subject', 'Level', 'Name'])
df_.to_csv(f'summary/{analysis_name}_latency{res_tag}{output_sfx}-array_level.csv.gz')
df_