In [2]:
import sys
sys.path.append("../src")

from preprocessing import *
from feature_extraction import *

import pandas as pd
import numpy as np
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger(__name__)

In [3]:
volunteer_ids = [f"S{n:03d}" for n in range(1,110)]
channel_ids = ['FC5', 'FC3', 'FC1', 'FCz', 'FC2', 'FC4', 'FC6', 'C5', 
               'C3', 'C1', 'Cz', 'C2', 'C4', 'C6', 'CP5', 'CP3', 'CP1', 
               'CPz', 'CP2', 'CP4', 'CP6', 'Fp1', 'Fpz', 'Fp2', 'AF7', 
               'AF3', 'AFz', 'AF4', 'AF8', 'F7', 'F5', 'F3', 'F1', 'Fz', 
               'F2', 'F4', 'F6', 'F8', 'FT7', 'FT8', 'T7', 'T8', 'T9', 
               'T10', 'TP7', 'TP8', 'P7', 'P5', 'P3', 'P1', 'Pz', 'P2', 
               'P4', 'P6', 'P8', 'PO7', 'PO3', 'POz', 'PO4', 'PO8', 'O1', 
               'Oz', 'O2', 'Iz']
occipital_channels = ['PO7', 'PO3', 'POz', 'PO4', 'PO8', 'O1', 'Oz', 'O2']
root_dir = '../data/files/'
result_dir = '../results/tmp/'


CASE OF UNFILTERED DATA


ETC W/O ORDINAL ANALYSIS

In [4]:
results = []
logger.info(f"Processing {len(volunteer_ids)} volunteers with {len(channel_ids)} channels each")
    
for i, v in enumerate(volunteer_ids, 1):
    logger.info(f"Processing {v} ({i}/{len(volunteer_ids)})")
    r1, r2 = extract_data(v, root_dir)
    
    labels = r1.ch_names
    label_idx = {label: idx for idx, label in enumerate(labels)}
 
    r1_data = r1.get_data()
    r2_data = r2.get_data()

    for ch in channel_ids:
        idx = label_idx[ch]
        signal1 = r1_data[idx,:]
        signal2 = r2_data[idx,:]
        
        etc_1 = etc_func(signal1)
        etc_2 = etc_func(signal2)

        results.append({
            'volunteer': v,
            'channel': ch,
            'ETC_EyesOpen': etc_1,
            'ETC_EyesClosed': etc_2,
            })
    # Progress update
    if i % 10 == 0:
        logger.info(f"Completed {i}/{len(volunteer_ids)} volunteers")
    
logger.info(f"Processing complete. Total results: {len(results)}")
    
data_unfiltered = pd.DataFrame(results)
%store data_unfiltered

2025-07-30 16:00:41,445 - Processing 109 volunteers with 64 channels each
2025-07-30 16:00:41,446 - Processing S001 (1/109)
2025-07-30 16:00:53,073 - Processing S002 (2/109)
2025-07-30 16:00:59,905 - Processing S003 (3/109)
2025-07-30 16:01:04,326 - Processing S004 (4/109)
2025-07-30 16:01:08,192 - Processing S005 (5/109)
2025-07-30 16:01:12,854 - Processing S006 (6/109)
2025-07-30 16:01:20,132 - Processing S007 (7/109)
2025-07-30 16:01:25,819 - Processing S008 (8/109)
2025-07-30 16:01:29,204 - Processing S009 (9/109)
2025-07-30 16:01:37,250 - Processing S010 (10/109)
2025-07-30 16:01:40,680 - Completed 10/109 volunteers
2025-07-30 16:01:40,681 - Processing S011 (11/109)
2025-07-30 16:01:44,275 - Processing S012 (12/109)
2025-07-30 16:01:48,444 - Processing S013 (13/109)
2025-07-30 16:01:54,577 - Processing S014 (14/109)
2025-07-30 16:01:59,729 - Processing S015 (15/109)
2025-07-30 16:02:06,687 - Processing S016 (16/109)
2025-07-30 16:02:11,282 - Processing S017 (17/109)
2025-07-30 16:

Stored 'data_unfiltered' (DataFrame)



ETC WITH ORDINAL ANALYSIS

In [7]:
TIME_WINDOW = 3
DELAY = 1

In [None]:
results = []
logger.info(f"Processing {len(volunteer_ids)} volunteers with {len(channel_ids)} channels each")

for i, v in enumerate(volunteer_ids, 1):
    logger.info(f"Processing {v} ({i}/{len(volunteer_ids)})")
    
    r1, r2 = extract_data(v, root_dir)
    r1_data = r1.get_data()
    r2_data = r2.get_data()
    
    labels = r1.ch_names
    channel_idx_map = {name: idx for idx, name in enumerate(labels)}

    for ch in channel_ids:   
        try:
            idx = channel_idx_map[ch]
            signal1 = r1_data[idx, :]
            signal2 = r2_data[idx, :]
            
            
            pattern_to_symbol, n = permute_patterns(TIME_WINDOW)
            
            # Compute ordinal patterns
            symbols1 = ordinal_patterns(pattern_to_symbol, signal1, TIME_WINDOW)
            symbols2 = ordinal_patterns(pattern_to_symbol, signal2, TIME_WINDOW)
            
            # Compute ETC
            etc1 = etc_func(symbols1, bins=n)
            etc2 = etc_func(symbols2, bins=n)
            
            results.append({
                'volunteer': v,
                'channel': ch,
                'ETC_EyesOpen': etc1,
                'ETC_EyesClosed': etc2,
            })
            
        except Exception as e:
            logger.error(f"Error processing channel {ch} for {v}: {e}")
            continue
    # Progress update
    if i % 10 == 0:
        logger.info(f"Completed {i}/{len(volunteer_ids)} volunteers")

logger.info(f"Processing complete. Total results: {len(results)}")

data_unfiltered_ordinal = pd.DataFrame(results)
%store data_unfiltered_ordinal

2025-07-31 19:11:08,590 - Processing 109 volunteers with 64 channels each
2025-07-31 19:11:08,593 - Processing S001 (1/109)
2025-07-31 19:11:37,917 - Processing S002 (2/109)
2025-07-31 19:12:09,943 - Processing S003 (3/109)
2025-07-31 19:12:39,466 - Processing S004 (4/109)



CASE OF FILTERED DATA


ETC W/O ORDINAL ANALYSIS

In [3]:
results = []
logger.info(f"Processing {len(volunteer_ids)} volunteers with {len(channel_ids)} channels each")
    
for i, v in enumerate(volunteer_ids, 1):
    logger.info(f"Processing {v} ({i}/{len(volunteer_ids)})")  
    r1, r2 = extract_data(v, root_dir)
    r1, r2 = filter_data(r1, r2)

    labels = r1.ch_names
    label_idx = {label: idx for idx, label in enumerate(labels)}

    r1_data = r1.get_data()
    r2_data = r2.get_data()

    for ch in channel_ids:
        idx = label_idx[ch]
        signal1 = r1_data[idx,:]
        signal2 = r2_data[idx,:]
        
        etc_1 = etc_func(signal1)
        etc_2 = etc_func(signal2)

        results.append({
            'volunteer': v,
            'channel': ch,
            'ETC_EyesOpen': etc_1,
            'ETC_EyesClosed': etc_2,
            })
    # Progress update
    if i % 10 == 0:
        logger.info(f"Completed {i}/{len(volunteer_ids)} volunteers")
    
logger.info(f"Processing complete. Total results: {len(results)}")
    
data_filtered = pd.DataFrame(results)
%store data_filtered

2025-07-29 16:42:36,854 - Processing 109 volunteers with 64 channels each
2025-07-29 16:42:36,856 - Processing S001 (1/109)
2025-07-29 16:42:47,832 - Processing S002 (2/109)
2025-07-29 16:42:51,691 - Processing S003 (3/109)
2025-07-29 16:42:55,549 - Processing S004 (4/109)
2025-07-29 16:42:59,239 - Processing S005 (5/109)
2025-07-29 16:43:03,246 - Processing S006 (6/109)
2025-07-29 16:43:07,395 - Processing S007 (7/109)
2025-07-29 16:43:11,251 - Processing S008 (8/109)
2025-07-29 16:43:14,955 - Processing S009 (9/109)
2025-07-29 16:43:18,824 - Processing S010 (10/109)
2025-07-29 16:43:22,788 - Completed 10/109 volunteers
2025-07-29 16:43:22,789 - Processing S011 (11/109)
2025-07-29 16:43:26,667 - Processing S012 (12/109)
2025-07-29 16:43:30,703 - Processing S013 (13/109)
2025-07-29 16:43:34,696 - Processing S014 (14/109)
2025-07-29 16:43:38,417 - Processing S015 (15/109)
2025-07-29 16:43:42,454 - Processing S016 (16/109)
2025-07-29 16:43:46,534 - Processing S017 (17/109)
2025-07-29 16:

Stored 'data_filtered' (DataFrame)



ETC WITH ORDINAL ANALYSIS

In [5]:
TIME_WINDOW = 3
DELAY = 1
BINS = 6

In [6]:
results = []
logger.info(f"Processing {len(volunteer_ids)} volunteers with {len(channel_ids)} channels each")

for i, v in enumerate(volunteer_ids, 1):
    logger.info(f"Processing {v} ({i}/{len(volunteer_ids)})")
    
    r1, r2 = extract_data(v, root_dir)
    r1, r2 = filter_data(r1, r2)
    r1_data = r1.get_data()
    r2_data = r2.get_data()
    
    labels = r1.ch_names
    channel_idx_map = {name: idx for idx, name in enumerate(labels)}

    for ch in channel_ids:   
        try:
            idx = channel_idx_map[ch]
            signal1 = r1_data[idx, :]
            signal2 = r2_data[idx, :]
            
            
            pattern_to_symbol, n = permute_patterns(TIME_WINDOW)
            
            # Compute ordinal patterns
            symbols1 = ordinal_patterns(pattern_to_symbol, signal1, TIME_WINDOW)
            symbols2 = ordinal_patterns(pattern_to_symbol, signal2, TIME_WINDOW)
            
            # Compute ETC
            etc1 = etc_func(symbols1, bins=n)
            etc2 = etc_func(symbols2, bins=n)
            
            results.append({
                'volunteer': v,
                'channel': ch,
                'ETC_EyesOpen': etc1,
                'ETC_EyesClosed': etc2,
            })
            
        except Exception as e:
            logger.error(f"Error processing channel {ch} for {v}: {e}")
            continue
    # Progress update
    if i % 10 == 0:
        logger.info(f"Completed {i}/{len(volunteer_ids)} volunteers")

logger.info(f"Processing complete. Total results: {len(results)}")

data_filtered_ordinal = pd.DataFrame(results)
%store data_filtered_ordinal

2025-07-31 18:31:55,303 - Processing 109 volunteers with 64 channels each
2025-07-31 18:31:55,305 - Processing S001 (1/109)
2025-07-31 18:32:06,791 - Processing S002 (2/109)
2025-07-31 18:32:17,666 - Processing S003 (3/109)
2025-07-31 18:32:28,785 - Processing S004 (4/109)
2025-07-31 18:32:39,374 - Processing S005 (5/109)
2025-07-31 18:32:50,969 - Processing S006 (6/109)
2025-07-31 18:33:03,072 - Processing S007 (7/109)
2025-07-31 18:33:13,931 - Processing S008 (8/109)
2025-07-31 18:33:25,142 - Processing S009 (9/109)
2025-07-31 18:33:36,532 - Processing S010 (10/109)
2025-07-31 18:33:47,975 - Completed 10/109 volunteers
2025-07-31 18:33:47,975 - Processing S011 (11/109)
2025-07-31 18:33:59,217 - Processing S012 (12/109)
2025-07-31 18:34:10,783 - Processing S013 (13/109)
2025-07-31 18:34:22,498 - Processing S014 (14/109)
2025-07-31 18:34:33,746 - Processing S015 (15/109)
2025-07-31 18:34:44,806 - Processing S016 (16/109)
2025-07-31 18:34:56,495 - Processing S017 (17/109)
2025-07-31 18:

Stored 'data_filtered_ordinal' (DataFrame)
