# Annotation Analysis and Consensus

## Imports

In [2]:
# Standard imports
import numpy as np
import scipy as sc
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import itertools

# ML Imports
from sklearn.metrics import cohen_kappa_score, f1_score, matthews_corrcoef
import statsmodels.formula.api as smf
from statannotations.Annotator import Annotator



# OS imports
from os.path import join as ospj
from os.path import exists as ospe
import sys

from ieeg.auth import Session
from utils import load_config, clean_labels, remove_scalp_electrodes, cohens_d, get_rpath, surgical_parcellation
sys.path.append('/users/wojemann/iEEG_processing')

import os

In [3]:
usr,passpath,datapath,prodatapath,metapath,figpath,patient_table,rid_hup,pt_list = load_config(ospj('/mnt/leif/littlab/users/wojemann/stim-seizures/code','config.json'))

In [4]:
plt.rcParams['image.cmap'] = 'magma'

plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['axes.titlesize'] = 16
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['lines.linewidth'] = 2

plt.rcParams['xtick.major.size'] = 5  # Change to your desired major tick size
plt.rcParams['ytick.major.size'] = 5  # Change to your desired major tick size
plt.rcParams['xtick.minor.size'] = 3   # Change to your desired minor tick size
plt.rcParams['ytick.minor.size'] = 3   # Change to your desired minor tick size

plt.rcParams['xtick.major.width'] = 2  # Change to your desired major tick width
plt.rcParams['ytick.major.width'] = 2  # Change to your desired major tick width
plt.rcParams['xtick.minor.width'] = 1  # Change to your desired minor tick width
plt.rcParams['ytick.minor.width'] = 1  # Change to your desired minor tick width
pal = sns.color_palette('Set2')[:2][::-1] + ['grey']

## Workspace Preparation

In [5]:
# Loading seizure onset meta data
sz_metadata = pd.read_csv(ospj(metapath,'stim_seizure_information - LF_seizure_annotation.csv'))
sz_metadata.drop(['Notes','Summaries'],axis=1,inplace=True)
# sz_metadata.replace({np.nan:pd.NA},inplace=True)

In [6]:
def combine_ch_annots(group):
    ret = {"clinician": [],
           "patient": [],
           "ueo": [],
           "ueo_time": [],
           "sec": []
          }
    # Sorting seizure annotations by clinician
    group.sort_values(by='Clinician',inplace=True)
    # Unifying and extracting iEEG name
    group['IEEGname'] = group['IEEGname'].fillna(method='ffill').fillna(method='bfill')
    ieeg_name = group['IEEGname'].iloc[0]
    pt = group['Patient'].iloc[0]
    # Get all channels from iEEG portal
    with open('woj_ieeglogin.bin', 'r') as f:
        session = Session('wojemann', f.read())
    ds = session.open_dataset(ieeg_name)
    # Clean channel labels
    raw_labels = clean_labels(ds.get_channel_labels(),pt)
    # Remove scalp electrodes from channels
    all_chs = remove_scalp_electrodes(raw_labels)

    for _,row in group.iterrows(): # for each annotation in seizure
        for i,v in row.items(): # for each column in annotation
            if not isinstance(v,list): # filling in nan's as empty list to ensure formatting consistency
                if pd.isna(v):
                    row[i] = []
        # Adding anntotor and patient data
        ret["clinician"].append(row.Clinician)
        ret["patient"] = row.Patient
        # Adding UEO and Spread channel ifnormation
        ret["ueo"].append([l in clean_labels(row.UEO_ch,pt) for l in all_chs])
        ret["sec"].append([l in clean_labels(row["10sec_ch"],pt) for l in all_chs])
        # Adding anntotated start time
        ret["ueo_time"].append(float(row.UEO_time))
        
    # Adding in additional patient metadata to table from originnal stim seizure metadata
    ret["all_chs"] = all_chs
    ret["iEEG_ID"] = row["IEEGname"]
    ret["approximate_onset"] = row["approximate_onset"]
    ret["offset"] = row["end"]
    ret["stim"] = row["stim"]
        
    return pd.Series(ret)

def create_consensus(row):
    for key in ["ueo","sec"]:
        row[f"{key}_consensus"] = np.array(np.sum(np.array(row[key]).astype(int),axis=0) >= (len(row[key])/2)).astype(bool)
        row[f"{key}_any"] = np.array(np.sum(np.array(row[key]).astype(int),axis=0) >= 1).astype(bool)
        if key == 'ueo':
            row[f"{key}_time_consensus"] = np.median(row[f"{key}_time"])
    return row

## All annotations

### Creating consensus

In [7]:
annotators = np.sort(['EC','CK','DZ','JJ','JK']) # annotator list
time_labels = ['UEO_time'] # onset time label
clinician_tables = {}
for clin in annotators:
    # Load in secondary annotation push
    annot_table = pd.read_csv(ospj(metapath,f"stim_seizure_annotations_{clin}.csv"))
    # Load in annotations from initial assessment
    demo_table = pd.read_csv(ospj(metapath,f"stim_seizure_information - {clin}_annotation_demo.csv"))
    # Unifying onset column name
    demo_table.columns = [c if c != 'Onset' else 'approximate_onset' for c in demo_table.columns]
    
    # Load in additional annotations
    
    # Aggregating tables across clinicians and annotation sections
    clinician_tables[clin] = []
    clinician_tables[clin].append(annot_table)
    clinician_tables[clin].append(demo_table)
    if clin in ['EC','DZ']:
        new_table = pd.read_csv(ospj(metapath,f"stim_seizure_information - {clin}_makeup.csv"))
        new_table = new_table[["Patient","IEEGname","approximate_onset","end","stim","stim_channels","annotators","UEO_time","UEO_ch","10sec_ch"]]
        clinician_tables[clin].append(new_table)

# Organizing dataframes into a list for concatenation
df_list = [df for l in clinician_tables.values() for df in l]
# Getting annotator key for each DF
# key_list = np.array([[key]*len(clinician_tables[key]) for key in clinician_tables.keys()]).flatten()
key_list = np.array(list(itertools.chain.from_iterable([[key]*len(clinician_tables[key]) for key in clinician_tables.keys()])))

# Combining DFs
combined_df = pd.concat(df_list, keys=key_list, names=['Clinician'])

# Reset index to avoid having MultiIndex
combined_df.reset_index(inplace=True)

# Drop nan rows from csv loading
combined_df = combined_df.dropna(axis=0,how='any',subset=['UEO_ch'])

# Create a unique identifier for each seizure
combined_df['Seizure_ID'] = combined_df.groupby(['Patient', 'Clinician']).cumcount() + 1
combined_df['Seizure_ID'] = combined_df['Patient'] + '_' + combined_df['Seizure_ID'].astype(int).astype(str)
combined_df = combined_df.drop(['level_1'],axis=1)

# Turning strings of channels to a list
combined_df['UEO_ch'] = combined_df['UEO_ch'].str.split(',')
combined_df['10sec_ch'] = combined_df['10sec_ch'].str.split(',')

In [9]:
combined_tuning_annots = combined_df.groupby('Seizure_ID').apply(combine_ch_annots).reset_index()

In [10]:
combined_tuning_annots.to_pickle(ospj(prodatapath,'raw_combined_clinician_annotations.pkl'))

In [10]:
combined_tuning_annots = pd.read_pickle(ospj(prodatapath,'raw_combined_clinician_annotations.pkl'))

In [13]:
consensus_tuning_annots = combined_tuning_annots.apply(create_consensus,axis=1)
consensus_tuning_annots.patient.unique()

array(['CHOP005', 'CHOP010', 'CHOP015', 'CHOP024', 'CHOP026', 'CHOP028',
       'CHOP035', 'CHOP037', 'CHOP038', 'CHOP041', 'CHOP044', 'CHOP045',
       'CHOP046', 'CHOP049', 'HUP224', 'HUP225', 'HUP229', 'HUP230',
       'HUP235', 'HUP238', 'HUP246', 'HUP247', 'HUP249', 'HUP250',
       'HUP253', 'HUP257', 'HUP261', 'HUP263', 'HUP266', 'HUP267',
       'HUP273', 'HUP275', 'HUP288'], dtype=object)

In [13]:
consensus_tuning_annots.to_pickle(ospj(prodatapath,"threshold_tuning_consensus_v2.pkl"))