# Metadata Extraction and Analysis

## Imports and dataloading

In [1]:
import numpy as np
import pandas as pd
import json
import seaborn as sns
import matplotlib.pyplot as plt
from ieeg.auth import Session
from scipy import signal as sig
import os
from os.path import join as ospj
from os.path import exists as ospe
import pathlib
from tqdm import tqdm

from utils import *

In [2]:
import sys
sys.path.append('/users/wojemann/iEEG_processing')
from pioneer import Pioneer

In [3]:
usr,passpath,datapath,prodatapath,figpath,patient_table,rid_hup,pt_list = \
load_config(ospj('/mnt/leif/littlab/users/wojemann/stim-seizures/code','config.json'),'CHOP')

In [4]:
patient_table

Unnamed: 0,ptID,ieeg_ids,lf_stim,hf_stim,typical,mtle,interictal_training
0,CHOP005,"[CHOPCCEP_005, CHOP005]",1,0,,,[]
1,CHOP010,"[CHOPCCEP_010, CHOP010a, CHOP010b, CHOP010c]",1,0,,,[]
2,CHOP024,"[CHOPCCEP_024, CHOP024]",1,0,,,[]
3,CHOP026,"[CHOPCCEP_026, CHOP026]",1,0,,,[]
4,CHOP028,"[CHOPCCEP_028, CHOP028]",1,0,,,[]
5,CHOP035,"[CHOPCCEP_035, CHOP035]",1,0,,,[]
6,CHOP036,"[CHOPCCEP_036, CHOP036]",1,0,,,[]
7,CHOP037,"[CHOPCCEP_037, CHOP037]",1,0,,,[]
8,CHOP038,"[CHOPCCEP_038, CHOP038]",1,0,,,[]
9,CHOP041,"[CHOPCCEP_041, CHOP041]",1,0,,,[]


## Creating annotation assignments

In [5]:
seizures_df = pd.read_csv(ospj(datapath,'stim_seizure_information - LF_seizure_annotation.csv'))
seizures_df = seizures_df[~seizures_df.Patient.isin(["HUP235","HUP238","HUP246","HUP261"])]
seizures_df = seizures_df[seizures_df['to_annotate']==1]
seizures_df['annotators'] = ""
seizures_df['approximate_onset'].fillna(seizures_df['UEO'],inplace=True)
seizures_df['approximate_onset'].fillna(seizures_df['EEC'],inplace=True)
seizures_df['approximate_onset'].fillna(seizures_df['Other_onset_description'],inplace=True)
seizures_df = seizures_df.drop(['to_annotate','Notes','source','EEC onset channels','UEO onset channels','EEC','UEO','Other_onset_description'],axis=1).reset_index(drop=True)
seizures_df.head()

Unnamed: 0,Patient,IEEGname,approximate_onset,end,stim,stim_channels,annotators
0,HUP224,HUP224_phaseII,71156.59,71190.99,1.0,LB1-LB2,
1,HUP224,HUP224_phaseII,339135.86,339234.2,0.0,,
2,HUP224,HUP224_phaseII,491462.51,491541.43,0.0,,
3,HUP224,HUP224_phaseII,519177.95,519258.16,0.0,,
4,HUP225,HUP225_phaseII,159833.84,159913.05,1.0,RC1-RC2,


In [6]:
# Assuming you have a DataFrame named 'seizures_df' containing seizure data
# And a list of annotators initials 2,5
np.random.seed(10)
annotators = ['CK','EC','DZ','JJ','JK']
annotation_counts = {key: 0 for key in annotators}
def calc_weights(annotation_counts):
    weights = [1/(1+value) for value in annotation_counts.values()]
    tot_weight = sum(weights)
    return [w/tot_weight for w in weights]

# Assuming 'seizures_df' contains a column 'patient_id' indicating the patient ID for each seizure
# We'll first group seizures by patient_id
grouped_seizures = seizures_df.groupby('Patient')

# Dictionary to store DataFrames for each annotator
annotator_dfs = {}

# Create Primary DF that contains all seizures from all patients with all annotators per seizure
# Iterate over each patient group
for patient_id, patient_group in grouped_seizures:
    num_seizures = len(patient_group)
    # Randomly assign 3 annotators to the patient
    weights = calc_weights(annotation_counts)
    assigned_annotators = np.random.choice(annotators, size=3, replace=False,p=weights)
    annot_str = str(assigned_annotators)
    
    annotator_list = np.repeat(annot_str,num_seizures,0)
    # if len(annotator_list.shape) < 2:
    #     annotator_list = np.expand_dims(annotator_list,0)
    
    seizures_df.iloc[seizures_df.Patient == patient_id,-1] = annotator_list
    # Repeat the annotators for each seizure in the patient group
    for annotator in assigned_annotators:
        annotation_counts[annotator] += len(patient_group)
        if annotator in annotator_dfs.keys():
            annotator_dfs[annotator] = pd.concat([annotator_dfs[annotator],patient_group])
        else:
            annotator_dfs[annotator] = patient_group
print(annotation_counts)

{'CK': 34, 'EC': 42, 'DZ': 33, 'JJ': 35, 'JK': 36}


In [18]:
for key in annotator_dfs.keys():
    annotator_dfs[key][["UEO_time","UEO_ch","10sec_ch"]] = ""
    annotator_dfs[key].to_csv(ospj(prodatapath,f"stim_seizure_annotations_{key}.csv"),index=False)
seizures_df.to_csv(ospj(prodatapath,"LF_seizure_annotations_wannotator.csv"),index=False)

## Extracting seizure annotations from iEEG

In [6]:
for i,pt in patient_table.iloc[:,:].iterrows():
    for ieeg_pt in pt.ieeg_ids:
        try:
            save_path = ospj(datapath,pt.ptID)
            print(ieeg_pt,save_path)
            if not ospe(save_path):
                os.makedirs(save_path)
            wagon = Pioneer(usr,passpath,ieeg_pt)
            wagon.pull_annotations()
            wagon.filter_seizure_annotations()
            wagon.seizure_annotations.to_csv(ospj(save_path,f'seizure_annotations_{ieeg_pt}.csv'))
        except:
            continue

CHOPCCEP_005 /mnt/leif/littlab/users/wojemann/stim-seizures/RAW_DATA/CHOP005
got 97 annotations on call # 1 covering 1635717773 usec to 2527615234 usec
Filtered       9.79% of all annotations
CHOP005 /mnt/leif/littlab/users/wojemann/stim-seizures/RAW_DATA/CHOP005
got 100 annotations on call # 1 covering 7981626953 usec to 15118432128 usec
got 100 annotations on call # 2 covering 15119359863 usec to 24459052734 usec
got 100 annotations on call # 3 covering 24459814453 usec to 31742939453 usec
got 100 annotations on call # 4 covering 31744939453 usec to 66248345703 usec
got 100 annotations on call # 5 covering 66817928636 usec to 97824043382 usec
got 64 annotations on call # 6 covering 97824043382 usec to 148402741136 usec
Filtered       7.16% of all annotations
CHOPCCEP_010 /mnt/leif/littlab/users/wojemann/stim-seizures/RAW_DATA/CHOP010
got 100 annotations on call # 1 covering 1931744140 usec to 4013451171 usec
got 100 annotations on call # 2 covering 4523158203 usec to 6077830078 usec
