# Hierarchical bootsrap

### Set path

In [1]:
PROJECT_PATH = 'C:/Users/micha/projects/oscillation_vs_exponent/'

## Set-up

### Imports

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from time import time

### Settings

In [3]:
N_ITERATIONS = 1000 # number of iterations for bootstrapping
VAR = 'exponent' # variable to analyze

## Main

### load specparam results

In [4]:
# load spectral parameterization results
results = pd.read_csv(f"{PROJECT_PATH}/data/results/spectral_parameters.csv", index_col=0)

# show
print(results.shape)
print(len(results)/8)
results.head()

(5560, 19)
695.0


Unnamed: 0,patient,chan_idx,label,pos_y,pos_x,pos_z,unique_id,material,memory,epoch,offset,knee,exponent,f_rotation,alpha_cf,alpha_pw,alpha_bw,alpha_bp,alpha_adj
0,pat02,0,A01-A02,-65.431,61.9449,3.55955,pat02_0,words,hit,prestim,8.451107,12.903272,3.542673,10.978558,,,,27777.899824,5664.172994
1,pat02,1,A02-A03,-70.93895,57.17765,12.1554,pat02_1,words,hit,prestim,7.595982,12.332651,3.05248,83.836257,10.839096,0.210785,2.0,14445.161672,2948.320508
2,pat02,2,A03-A04,-75.3955,51.3944,20.94335,pat02_2,words,hit,prestim,7.056773,8.787521,2.774045,89.824561,12.850783,0.276397,2.0,15085.969728,4269.780182
3,pat02,3,A04-A05,-78.9195,43.9098,30.13485,pat02_3,words,hit,prestim,7.59508,10.177476,2.940543,59.883041,,,,23672.502537,2726.078761
4,pat02,4,A05-A06,-80.96735,35.21485,38.13475,pat02_4,words,hit,prestim,7.399935,9.126477,2.872949,65.871345,11.565201,0.227912,2.671217,21742.063343,3556.630628


### perform hierarchical bootsrap
level 1: epoch (experimental condition)  
level 2: patient  
level 3: channel

In [5]:
# split results by trial type (words/faces) and take successful trials only (memory==hit)
df_w = results.loc[((results['memory']=='hit')&(results['material']=='words'))]
df_f = results.loc[((results['memory']=='hit')&(results['material']=='faces'))]

# start with one trial type
df = df_w.copy()


In [6]:
# compute true difference between experimental conditions 
true_diff = np.nanmean(df.loc[df['epoch']=='poststim', VAR]) - \
    np.nanmean(df.loc[df['epoch']=='prestim', VAR])
print(f"True difference: {true_diff :0.3f}")

True difference: -0.105


In [7]:
# count number of channels for each patient
patients = results['patient'].unique()
temp = results[(results['memory']=='hit') & (results['material']=='words') & (results['epoch']=='prestim')]
n_channels = dict()
for patient in patients:
    n_channels[patient] = len(temp.loc[temp['patient']==patient])
n_channels

{'pat02': 9,
 'pat04': 82,
 'pat05': 64,
 'pat08': 76,
 'pat10': 21,
 'pat11': 68,
 'pat15': 55,
 'pat16': 13,
 'pat17': 56,
 'pat19': 88,
 'pat20': 37,
 'pat21': 73,
 'pat22': 53}

In [8]:
# perform hierarchical bootstrap on exponent values

# time it 
t_start = time()

# successful trials only
df = results[results['memory']=='hit']
n_samples = len(df)

# get levels
epochs = df['epoch'].unique()
materials = df['material'].unique()
# patients = df['patient'].unique()

# loop through iterations
diff = np.zeros(N_ITERATIONS)
for i_iteration in range(N_ITERATIONS):
    # display progress
    print(f"Iteration {i_iteration+1}/{N_ITERATIONS}")

    # resample level 1: epoch
    epochs = np.random.choice(epochs, size=n_samples)
    # resample level 2: patient
    patients = np.random.choice(patients, size=n_samples)

    # resample level 3: channel, and get data
    value = np.zeros(n_samples)
    for i_sample, (epoch, patient) in enumerate(zip(epochs, patients)):
        # resample level 4: channel
        channel = np.random.choice(n_channels[patient], size=1)[0]

        # get data
        value[i_sample] = df.loc[(df['epoch']==epoch) & (df['patient']==patient) & (df['chan_idx']==channel), VAR].values[0]

    # compute difference between resampled experimental conditions
    diff[i_iteration] = np.nanmean(value[len(value)//2:]) - np.nanmean(value[:len(value)//2])
    # print(diff[i_iteration])

# plot
fig, ax = plt.subplots(figsize=(6,4))
ax.hist(diff, bins=20, color='k')
ax.axvline(true_diff, color='k', linestyle='--')
ax.set_xlabel('exponent difference')
ax.set_ylabel('count')
ax.set_title('Resampled exponent')
plt.show()

# print time
print(f"Resampling completed in {time() - t_start :0.1f} s")

Iteration 1/1000


Iteration 2/1000
Iteration 3/1000
Iteration 4/1000
Iteration 5/1000
Iteration 6/1000
Iteration 7/1000
Iteration 8/1000
Iteration 9/1000
Iteration 10/1000
Iteration 11/1000
Iteration 12/1000
Iteration 13/1000
Iteration 14/1000
Iteration 15/1000
Iteration 16/1000
Iteration 17/1000
Iteration 18/1000
Iteration 19/1000


In [None]:
# # perform hierarchical bootstrap - include material as additional level

# # time it 
# t_start = time()

# # successful trials only
# df = results[results['memory']=='hit']
# n_samples = len(df)

# # get levels
# epochs = df['epoch'].unique()
# materials = df['material'].unique()

# # loop through iterations
# diff = np.zeros(n_iterations)
# for ii in range(n_iterations):
#     # display progress
#     print(f"Iteration {ii+1}/{n_iterations}")

#     # resample level 1: epoch
#     epochs = np.random.choice(epochs, size=n_samples)
#     # resample level 2: material
#     materials = np.random.choice(materials, size=n_samples)
#     # resample level 3: patient
#     patients = np.random.choice(patients, size=n_samples)

#     # resample level 4: channel, and get data
#     value = np.zeros(n_samples)
#     for i_sample, (epoch, material, patient) in enumerate(zip(epochs, materials, patients)):
#         # resample level 4: channel
#         channel = np.random.choice(n_channels[patient], size=1)[0]

#         # get data
#         value[i_sample] = df.loc[(df['epoch']==epoch) & (df['material']==material) \
#             & (df['patient']==patient) & (df['chan_idx']==channel), var].values[0]

#     # compute difference between resampled experimental conditions
#     diff[i_iteration] = np.nanmean(value[len(value)//2:]) - np.nanmean(value[:len(value)//2])
#     print(diff[i_iteration])

# # plot
# fig, ax = plt.subplots(figsize=(6,4))
# ax.hist(diff, bins=20, color='k')
# ax.axvline(true_diff, color='k', linestyle='--')
# ax.set_xlabel('exponent difference')
# ax.set_ylabel('count')
# ax.set_title('Resampled exponent')
# plt.show()

# # print time
# print(f"Resampling completed in {time() - t_start :0.1f} s")