# Metadata Extraction and Analysis

## Imports and dataloading

In [1]:
import numpy as np
import pandas as pd
import json
import seaborn as sns
import matplotlib.pyplot as plt
from ieeg.auth import Session
from scipy import signal as sig
import os
from os.path import join as ospj
from os.path import exists as ospe
import pathlib
from tqdm import tqdm

In [2]:
import sys
sys.path.append('/users/wojemann/iEEG_processing')
from pioneer import Pioneer

In [3]:
with open('config.json','r') as f:
    CONFIG = json.load(f)
usr = CONFIG["paths"]["iEEG_USR"]
passpath = CONFIG["paths"]["iEEG_PWD"]
raw_datapath = CONFIG["paths"]["RAW_DATA"]
pro_datapath = CONFIG["paths"]["PROCESSED_DATA"]
figpath = CONFIG["paths"]["FIGURES"]
patient_table = pd.DataFrame(CONFIG["patients"]).sort_values('ptID')
rid_hup = pd.read_csv(ospj(raw_datapath,'rid_hup.csv'))
pt_list = patient_table.ptID.to_numpy()

In [4]:
pt_list

array(['CHOP005', 'CHOP010', 'CHOP015', 'CHOP024', 'CHOP026', 'CHOP028',
       'CHOP035', 'CHOP036', 'CHOP037', 'CHOP038', 'CHOP041', 'CHOP044',
       'CHOP045', 'CHOP046', 'HUP224', 'HUP225', 'HUP229', 'HUP230',
       'HUP235', 'HUP238', 'HUP246', 'HUP247', 'HUP249', 'HUP250',
       'HUP253', 'HUP257', 'HUP261', 'HUP263', 'HUP266', 'HUP267',
       'HUP273'], dtype=object)

## Creating annotation assignments

In [5]:
seizures_df = pd.read_csv(ospj(raw_datapath,'stim_seizure_information - LF_seizure_annotation.csv'))
seizures_df = seizures_df[~seizures_df.Patient.isin(["HUP235","HUP238","HUP246","HUP261"])]
seizures_df = seizures_df[seizures_df['to_annotate']==1]
seizures_df['annotators'] = ""
seizures_df['approximate_onset'].fillna(seizures_df['UEO'],inplace=True)
seizures_df['approximate_onset'].fillna(seizures_df['EEC'],inplace=True)
seizures_df['approximate_onset'].fillna(seizures_df['Other_onset_description'],inplace=True)
seizures_df = seizures_df.drop(['to_annotate','Notes','source','EEC onset channels','UEO onset channels','EEC','UEO','Other_onset_description'],axis=1).reset_index(drop=True)
seizures_df.head()

Unnamed: 0,Patient,IEEGname,approximate_onset,end,stim,stim_channels,annotators
0,HUP224,HUP224_phaseII,71156.59,71190.99,1.0,LB1-LB2,
1,HUP224,HUP224_phaseII,339135.86,339234.2,0.0,,
2,HUP224,HUP224_phaseII,491462.51,491541.43,0.0,,
3,HUP224,HUP224_phaseII,519177.95,519258.16,0.0,,
4,HUP225,HUP225_phaseII,159833.84,159913.05,1.0,RC1-RC2,


In [6]:
# Assuming you have a DataFrame named 'seizures_df' containing seizure data
# And a list of annotators initials 2,5
np.random.seed(10)
annotators = ['CK','EC','DZ','JJ','JK']
annotation_counts = {key: 0 for key in annotators}
def calc_weights(annotation_counts):
    weights = [1/(1+value) for value in annotation_counts.values()]
    tot_weight = sum(weights)
    return [w/tot_weight for w in weights]

# Assuming 'seizures_df' contains a column 'patient_id' indicating the patient ID for each seizure
# We'll first group seizures by patient_id
grouped_seizures = seizures_df.groupby('Patient')

# Dictionary to store DataFrames for each annotator
annotator_dfs = {}

# Create Primary DF that contains all seizures from all patients with all annotators per seizure
# Iterate over each patient group
for patient_id, patient_group in grouped_seizures:
    num_seizures = len(patient_group)
    # Randomly assign 3 annotators to the patient
    weights = calc_weights(annotation_counts)
    assigned_annotators = np.random.choice(annotators, size=3, replace=False,p=weights)
    annot_str = str(assigned_annotators)
    
    annotator_list = np.repeat(annot_str,num_seizures,0)
    # if len(annotator_list.shape) < 2:
    #     annotator_list = np.expand_dims(annotator_list,0)
    
    seizures_df.iloc[seizures_df.Patient == patient_id,-1] = annotator_list
    # Repeat the annotators for each seizure in the patient group
    for annotator in assigned_annotators:
        annotation_counts[annotator] += len(patient_group)
        if annotator in annotator_dfs.keys():
            annotator_dfs[annotator] = pd.concat([annotator_dfs[annotator],patient_group])
        else:
            annotator_dfs[annotator] = patient_group
print(annotation_counts)

{'CK': 34, 'EC': 42, 'DZ': 33, 'JJ': 35, 'JK': 36}


In [18]:
for key in annotator_dfs.keys():
    annotator_dfs[key][["UEO_time","UEO_ch","10sec_ch"]] = ""
    annotator_dfs[key].to_csv(ospj(pro_datapath,f"stim_seizure_annotations_{key}.csv"),index=False)
seizures_df.to_csv(ospj(pro_datapath,"LF_seizure_annotations_wannotator.csv"),index=False)

## Extracting seizure annotations from iEEG

In [5]:
for i,pt in patient_table.iloc[[-1],:].iterrows():
    for ieeg_pt in pt.ieeg_ids:
        # try:
            save_path = ospj(raw_datapath,pt.ptID)
            print(ieeg_pt,save_path)
            if not ospe(save_path):
                os.makedirs(save_path)
            wagon = Pioneer(usr,passpath,ieeg_pt)
            wagon.pull_annotations()
            wagon.filter_seizure_annotations()
            wagon.seizure_annotations.to_csv(ospj(save_path,f'seizure_annotations_{ieeg_pt}.csv'))
        # except:
        #     continue

HUP273_phaseII /mnt/leif/littlab/users/wojemann/stim-seizures/RAW_DATA/HUP273
got 100 annotations on call # 1 covering 9805203125 usec to 74014621448 usec
got 65 annotations on call # 2 covering 74014621448 usec to 97808167910 usec
Filtered       9.76% of all annotations
HUP273b_phaseII /mnt/leif/littlab/users/wojemann/stim-seizures/RAW_DATA/HUP273


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_annotations['IEEG Time'] = t_sec


got 100 annotations on call # 1 covering 119970703 usec to 76641360152 usec
got 100 annotations on call # 2 covering 77511313277 usec to 171766735765 usec
got 100 annotations on call # 3 covering 172880747484 usec to 288824902427 usec
got 100 annotations on call # 4 covering 290354086021 usec to 389636631408 usec
got 100 annotations on call # 5 covering 391483348205 usec to 540753506744 usec
got 100 annotations on call # 6 covering 548032590729 usec to 641315553780 usec
got 78 annotations on call # 7 covering 644557969795 usec to 756408821085 usec
Filtered       9.45% of all annotations
HUP273c_phaseII /mnt/leif/littlab/users/wojemann/stim-seizures/RAW_DATA/HUP273
got 100 annotations on call # 1 covering 6250000 usec to 45592894531 usec
got 100 annotations on call # 2 covering 45600326171 usec to 117556341176 usec
got 100 annotations on call # 3 covering 117570087269 usec to 184395595421 usec
got 100 annotations on call # 4 covering 185933950890 usec to 233291249718 usec
got 100 annota