In [1]:
import sys
sys.path.append("../src")

from preprocessing import *
from feature_extraction import *

import pandas as pd
import numpy as np
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger(__name__)

In [2]:
volunteer_ids = [f"S{n:03d}" for n in range(1,110)]
channel_ids = ['FC5', 'FC3', 'FC1', 'FCz', 'FC2', 'FC4', 'FC6', 'C5', 
               'C3', 'C1', 'Cz', 'C2', 'C4', 'C6', 'CP5', 'CP3', 'CP1', 
               'CPz', 'CP2', 'CP4', 'CP6', 'Fp1', 'Fpz', 'Fp2', 'AF7', 
               'AF3', 'AFz', 'AF4', 'AF8', 'F7', 'F5', 'F3', 'F1', 'Fz', 
               'F2', 'F4', 'F6', 'F8', 'FT7', 'FT8', 'T7', 'T8', 'T9', 
               'T10', 'TP7', 'TP8', 'P7', 'P5', 'P3', 'P1', 'Pz', 'P2', 
               'P4', 'P6', 'P8', 'PO7', 'PO3', 'POz', 'PO4', 'PO8', 'O1', 
               'Oz', 'O2', 'Iz']
occipital_channels = ['PO7', 'PO3', 'POz', 'PO4', 'PO8', 'O1', 'Oz', 'O2']
root_dir = '../data/files/'
result_dir = '../results/tmp/'


CASE OF UNFILTERED DATA


ETC W/O ORDINAL ANALYSIS

In [3]:
results = []
logger.info(f"Processing {len(volunteer_ids)} volunteers with {len(channel_ids)} channels each")
    
for i, v in enumerate(volunteer_ids, 1):
    logger.info(f"Processing {v} ({i}/{len(volunteer_ids)})")
    r1, r2 = extract_data(v, root_dir)
    
    labels = r1.ch_names
    label_idx = {label: idx for idx, label in enumerate(labels)}
 
    r1_data = r1.get_data()
    r2_data = r2.get_data()

    for ch in channel_ids:
        idx = label_idx[ch]
        signal1 = r1_data[idx,:]
        signal2 = r2_data[idx,:]
        
        etc_1 = etc_func(signal1)
        etc_2 = etc_func(signal2)

        results.append({
            'volunteer': v,
            'channel': ch,
            'ETC_EyesOpen': etc_1,
            'ETC_EyesClosed': etc_2,
            })
    # Progress update
    if i % 10 == 0:
        logger.info(f"Completed {i}/{len(volunteer_ids)} volunteers")
    
logger.info(f"Processing complete. Total results: {len(results)}")
    
data_unfiltered = pd.DataFrame(results)
%store data_unfiltered

2025-07-31 23:33:29,166 - Processing 109 volunteers with 64 channels each
2025-07-31 23:33:29,168 - Processing S001 (1/109)
2025-07-31 23:33:41,206 - Processing S002 (2/109)
2025-07-31 23:33:47,961 - Processing S003 (3/109)
2025-07-31 23:33:52,328 - Processing S004 (4/109)
2025-07-31 23:33:56,184 - Processing S005 (5/109)
2025-07-31 23:34:00,895 - Processing S006 (6/109)
2025-07-31 23:34:08,219 - Processing S007 (7/109)
2025-07-31 23:34:13,961 - Processing S008 (8/109)
2025-07-31 23:34:17,435 - Processing S009 (9/109)
2025-07-31 23:34:25,647 - Processing S010 (10/109)
2025-07-31 23:34:29,141 - Completed 10/109 volunteers
2025-07-31 23:34:29,142 - Processing S011 (11/109)
2025-07-31 23:34:32,777 - Processing S012 (12/109)
2025-07-31 23:34:36,824 - Processing S013 (13/109)
2025-07-31 23:34:42,741 - Processing S014 (14/109)
2025-07-31 23:34:47,699 - Processing S015 (15/109)
2025-07-31 23:34:54,569 - Processing S016 (16/109)
2025-07-31 23:34:59,096 - Processing S017 (17/109)
2025-07-31 23:

Stored 'data_unfiltered' (DataFrame)



ETC WITH ORDINAL ANALYSIS

In [4]:
TIME_WINDOW = 3
DELAY = 1

In [5]:
results = []
logger.info(f"Processing {len(volunteer_ids)} volunteers with {len(channel_ids)} channels each")

for i, v in enumerate(volunteer_ids, 1):
    logger.info(f"Processing {v} ({i}/{len(volunteer_ids)})")
    
    r1, r2 = extract_data(v, root_dir)
    r1_data = r1.get_data()
    r2_data = r2.get_data()
    
    labels = r1.ch_names
    channel_idx_map = {name: idx for idx, name in enumerate(labels)}

    for ch in channel_ids:   
        try:
            idx = channel_idx_map[ch]
            signal1 = r1_data[idx, :]
            signal2 = r2_data[idx, :]
            
            
            pattern_to_symbol, n = permute_patterns(TIME_WINDOW)
            
            # Compute ordinal patterns
            symbols1 = ordinal_patterns(pattern_to_symbol, signal1, TIME_WINDOW)
            symbols2 = ordinal_patterns(pattern_to_symbol, signal2, TIME_WINDOW)
            
            # Compute ETC
            etc1 = etc_func(symbols1, bins=n)
            etc2 = etc_func(symbols2, bins=n)
            
            results.append({
                'volunteer': v,
                'channel': ch,
                'ETC_EyesOpen': etc1,
                'ETC_EyesClosed': etc2,
            })
            
        except Exception as e:
            logger.error(f"Error processing channel {ch} for {v}: {e}")
            continue
    # Progress update
    if i % 10 == 0:
        logger.info(f"Completed {i}/{len(volunteer_ids)} volunteers")

logger.info(f"Processing complete. Total results: {len(results)}")

data_unfiltered_ordinal = pd.DataFrame(results)
%store data_unfiltered_ordinal

2025-07-31 23:41:59,862 - Processing 109 volunteers with 64 channels each
2025-07-31 23:41:59,864 - Processing S001 (1/109)
2025-07-31 23:42:28,892 - Processing S002 (2/109)
2025-07-31 23:43:00,774 - Processing S003 (3/109)
2025-07-31 23:43:29,512 - Processing S004 (4/109)
2025-07-31 23:44:00,416 - Processing S005 (5/109)
2025-07-31 23:44:10,657 - Processing S006 (6/109)
2025-07-31 23:44:38,724 - Processing S007 (7/109)
2025-07-31 23:44:50,449 - Processing S008 (8/109)
2025-07-31 23:45:22,127 - Processing S009 (9/109)
2025-07-31 23:45:51,239 - Processing S010 (10/109)
2025-07-31 23:46:22,676 - Completed 10/109 volunteers
2025-07-31 23:46:22,677 - Processing S011 (11/109)
2025-07-31 23:46:52,894 - Processing S012 (12/109)
2025-07-31 23:47:24,014 - Processing S013 (13/109)
2025-07-31 23:47:53,637 - Processing S014 (14/109)
2025-07-31 23:48:25,542 - Processing S015 (15/109)
2025-07-31 23:48:55,934 - Processing S016 (16/109)
2025-07-31 23:49:26,776 - Processing S017 (17/109)
2025-07-31 23:

Stored 'data_unfiltered_ordinal' (DataFrame)



CASE OF FILTERED DATA


ETC W/O ORDINAL ANALYSIS

In [6]:
results = []
logger.info(f"Processing {len(volunteer_ids)} volunteers with {len(channel_ids)} channels each")
    
for i, v in enumerate(volunteer_ids, 1):
    logger.info(f"Processing {v} ({i}/{len(volunteer_ids)})")  
    r1, r2 = extract_data(v, root_dir)
    r1, r2 = filter_data(r1, r2)

    labels = r1.ch_names
    label_idx = {label: idx for idx, label in enumerate(labels)}

    r1_data = r1.get_data()
    r2_data = r2.get_data()

    for ch in channel_ids:
        idx = label_idx[ch]
        signal1 = r1_data[idx,:]
        signal2 = r2_data[idx,:]
        
        etc_1 = etc_func(signal1)
        etc_2 = etc_func(signal2)

        results.append({
            'volunteer': v,
            'channel': ch,
            'ETC_EyesOpen': etc_1,
            'ETC_EyesClosed': etc_2,
            })
    # Progress update
    if i % 10 == 0:
        logger.info(f"Completed {i}/{len(volunteer_ids)} volunteers")
    
logger.info(f"Processing complete. Total results: {len(results)}")
    
data_filtered = pd.DataFrame(results)
%store data_filtered

2025-08-01 00:33:10,495 - Processing 109 volunteers with 64 channels each
2025-08-01 00:33:10,497 - Processing S001 (1/109)
2025-08-01 00:33:14,816 - Processing S002 (2/109)
2025-08-01 00:33:18,588 - Processing S003 (3/109)
2025-08-01 00:33:22,487 - Processing S004 (4/109)
2025-08-01 00:33:26,180 - Processing S005 (5/109)
2025-08-01 00:33:30,127 - Processing S006 (6/109)
2025-08-01 00:33:34,266 - Processing S007 (7/109)
2025-08-01 00:33:38,089 - Processing S008 (8/109)
2025-08-01 00:33:41,753 - Processing S009 (9/109)
2025-08-01 00:33:45,588 - Processing S010 (10/109)
2025-08-01 00:33:49,519 - Completed 10/109 volunteers
2025-08-01 00:33:49,521 - Processing S011 (11/109)
2025-08-01 00:33:53,384 - Processing S012 (12/109)
2025-08-01 00:33:57,432 - Processing S013 (13/109)
2025-08-01 00:34:01,407 - Processing S014 (14/109)
2025-08-01 00:34:05,107 - Processing S015 (15/109)
2025-08-01 00:34:09,136 - Processing S016 (16/109)
2025-08-01 00:34:13,177 - Processing S017 (17/109)
2025-08-01 00:

Stored 'data_filtered' (DataFrame)



ETC WITH ORDINAL ANALYSIS

In [14]:
TIME_WINDOW = 5
DELAY = 1
BINS = 4

In [15]:
results = []
logger.info(f"Processing {len(volunteer_ids)} volunteers with {len(channel_ids)} channels each")

for i, v in enumerate(volunteer_ids, 1):
    logger.info(f"Processing {v} ({i}/{len(volunteer_ids)})")
    
    r1, r2 = extract_data(v, root_dir)
    r1, r2 = filter_data(r1, r2)
    r1_data = r1.get_data()
    r2_data = r2.get_data()
    
    labels = r1.ch_names
    channel_idx_map = {name: idx for idx, name in enumerate(labels)}

    for ch in channel_ids:   
        try:
            idx = channel_idx_map[ch]
            signal1 = r1_data[idx, :]
            signal2 = r2_data[idx, :]
            
            
            pattern_to_symbol, n = permute_patterns(TIME_WINDOW)
            
            # Compute ordinal patterns
            symbols1 = ordinal_patterns(pattern_to_symbol, signal1, TIME_WINDOW, DELAY=DELAY)
            symbols2 = ordinal_patterns(pattern_to_symbol, signal2, TIME_WINDOW, DELAY=DELAY)
            
            # Compute ETC
            etc1 = etc_func(symbols1, bins=BINS)
            etc2 = etc_func(symbols2, bins=BINS)
            
            results.append({
                'volunteer': v,
                'channel': ch,
                'ETC_EyesOpen': etc1,
                'ETC_EyesClosed': etc2,
            })
            
        except Exception as e:
            logger.error(f"Error processing channel {ch} for {v}: {e}")
            continue
    # Progress update
    if i % 10 == 0:
        logger.info(f"Completed {i}/{len(volunteer_ids)} volunteers")

logger.info(f"Processing complete. Total results: {len(results)}")

data_filtered_ordinal = pd.DataFrame(results)
%store data_filtered_ordinal

2025-08-04 01:53:58,742 - Processing 109 volunteers with 64 channels each
2025-08-04 01:53:58,745 - Processing S001 (1/109)
2025-08-04 01:54:07,583 - Processing S002 (2/109)
2025-08-04 01:54:16,282 - Processing S003 (3/109)
2025-08-04 01:54:24,921 - Processing S004 (4/109)
2025-08-04 01:54:33,523 - Processing S005 (5/109)
2025-08-04 01:54:42,653 - Processing S006 (6/109)
2025-08-04 01:54:51,792 - Processing S007 (7/109)
2025-08-04 01:55:00,418 - Processing S008 (8/109)
2025-08-04 01:55:09,310 - Processing S009 (9/109)
2025-08-04 01:55:18,289 - Processing S010 (10/109)
2025-08-04 01:55:27,214 - Completed 10/109 volunteers
2025-08-04 01:55:27,215 - Processing S011 (11/109)
2025-08-04 01:55:36,071 - Processing S012 (12/109)
2025-08-04 01:55:45,184 - Processing S013 (13/109)
2025-08-04 01:55:54,136 - Processing S014 (14/109)
2025-08-04 01:56:02,850 - Processing S015 (15/109)
2025-08-04 01:56:11,864 - Processing S016 (16/109)
2025-08-04 01:56:21,016 - Processing S017 (17/109)
2025-08-04 01:

Stored 'data_filtered_ordinal' (DataFrame)
