In [None]:
import pandas as pd
import numpy as np
import os

metadata = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/train.csv')
eeg_path = '/kaggle/input/hms-harmful-brain-activity-classification/train_eegs'

eegids = np.unique(metadata['eeg_id'])

eeg_summary = pd.DataFrame()
for eegid in eegids:
    record = {}
    eeg_data = pd.read_parquet(os.path.join(eeg_path, str(eegid) + '.parquet'))
    record['eeg_id'] = eegid
    eeg_traces = pd.DataFrame()
    eeg_traces['Fp1-F7'] = eeg_data['Fp1'] - eeg_data['F7']
    eeg_traces['F7-T3'] = eeg_data['F7'] - eeg_data['T3']
    eeg_traces['T3-T5'] = eeg_data['T3'] - eeg_data['T5']
    eeg_traces['T5-O1'] = eeg_data['T5'] - eeg_data['O1']
    eeg_traces['Fp2-F8'] = eeg_data['Fp2'] - eeg_data['F8']
    eeg_traces['F8-T4'] = eeg_data['F8'] - eeg_data['T4']
    eeg_traces['T4-T6'] = eeg_data['T4'] - eeg_data['T6']
    eeg_traces['T6-O2'] = eeg_data['T6'] - eeg_data['O2']
    eeg_traces['Fp1-F3'] = eeg_data['Fp1'] - eeg_data['F3']
    eeg_traces['F3-C3'] = eeg_data['F3'] - eeg_data['C3']
    eeg_traces['C3-P3'] = eeg_data['C3'] - eeg_data['P3']
    eeg_traces['P3-O1'] = eeg_data['P3'] - eeg_data['O1']
    eeg_traces['Fp2-F4'] = eeg_data['Fp2'] - eeg_data['F4']
    eeg_traces['F4-C4'] = eeg_data['F4'] - eeg_data['C4']
    eeg_traces['C4-P4'] = eeg_data['C4'] - eeg_data['P4']
    eeg_traces['P4-O2'] = eeg_data['P4'] - eeg_data['O2']
    eeg_traces['Fz-Cz'] = eeg_data['Fz'] - eeg_data['Cz']
    eeg_traces['Cz-Pz'] = eeg_data['Cz'] - eeg_data['Pz']
    record['min_eeg_val'] = np.min(eeg_traces)
    record['max_eeg_val'] = np.max(eeg_traces)
    record['mean_eeg'] = np.mean(eeg_traces)
    record['std_eeg'] = np.std(eeg_traces.values, axis=(0,1))
    record['range_eeg_val'] = record['max_eeg_val'] - record['min_eeg_val']
    record['min_ekg_val'] = np.min(eeg_data.iloc[:, -1:])
    record['max_ekg_val'] = np.max(eeg_data.iloc[:, -1:])
    record['mean_ekg'] = np.mean(eeg_data.iloc[:, -1:])
    record['std_ekg'] = np.std(eeg_data.iloc[:,-1:].values)
    record['range_ekg_val'] = record['max_ekg_val'] - record['min_ekg_val']
    record['nbr_nan_rows'] = len(np.unique(np.where(eeg_data.isna())[0]))
    print(record)
    eeg_summary = pd.concat([eeg_summary, pd.DataFrame([record])], axis=0, ignore_index=True)

eeg_summary.to_csv('eeg_extended_summary.csv')

# **Large EEG ranges (in μV)**

In [None]:
print('EEG range > 2000 μV or 2 mV:', len(eeg_summary.loc[eeg_summary['range_eeg_val'] > 2000, 'eeg_id'].values.flatten().tolist()))
print('EEG range > 10000 μV or 10 mV:', len(eeg_summary.loc[eeg_summary['range_eeg_val'] > 10000, 'eeg_id'].values.flatten().tolist()))
print(eeg_summary.loc[eeg_summary['range_eeg_val'] > 10000, 'eeg_id'].values.flatten().tolist())

# **EKG with std. dev. = 0**

In [None]:
print('EKG with mean = min = max = 9999 μV:', len(eeg_summary.loc[eeg_summary['mean_ekg'] == 9999, 'eeg_id'].values.flatten().tolist()))
print(eeg_summary.loc[eeg_summary['mean_ekg'] == 9999, 'eeg_id'].values.flatten().tolist())

In [None]:
print('EKG with mean = min = max = 0 μV:', len(eeg_summary.loc[eeg_summary['mean_ekg'] == 0, 'eeg_id'].values.flatten().tolist()))
print(eeg_summary.loc[eeg_summary['mean_ekg'] == 0, 'eeg_id'].values.flatten().tolist())

# **Large EKG ranges (in μV)**

In [None]:
print('EKG range > 10000 μV or 10 mV:', len(eeg_summary.loc[eeg_summary['range_ekg_val'] > 10000, 'eeg_id'].values.flatten().tolist()))
print(eeg_summary.loc[eeg_summary['range_ekg_val'] > 10000, 'eeg_id'].values.flatten().tolist())