In [1]:
import os
import glob
import numpy as np
import pandas as pd
import mne

from mne.preprocessing import ICA
from mne_icalabel import label_components

from pyriemann.utils.mean import mean_riemann
from pyriemann.utils.base import invsqrtm

import warnings
warnings.filterwarnings("ignore")

RANDOM_STATE = 42

TARGET_SFREQ = 250.0      # Common sampling rate
LOW_FREQ = 0.5
HIGH_FREQ = 50.0


In [2]:
DS1_PATH = r"C:\Users\Usha Sri\OneDrive\Documents\Parkinson_Project\ds004584-download"
DS2_PATH = r"C:\Users\Usha Sri\OneDrive\Documents\Parkinson_Project\PD_Dataset_timing"


In [3]:
def detect_bad_channels(raw, z_thresh=3.5):
    data = raw.get_data(reject_by_annotation="omit")
    channel_var = np.var(data, axis=1)
    z_scores = (channel_var - np.mean(channel_var)) / np.std(channel_var)
    bads = [raw.ch_names[i] for i, z in enumerate(z_scores) if abs(z) > z_thresh]
    return bads


In [4]:
def run_ica_iclabel(raw):
    n_components = min(0.99, raw.info["nchan"] - 1)

    ica = ICA(
        n_components=n_components,
        method="fastica",   # ✅ FIX HERE
        random_state=RANDOM_STATE,
        max_iter="auto"
    )

    ica.fit(raw)

    labels = label_components(raw, ica, method="iclabel")

    artifact_idx = [
        i for i, lbl in enumerate(labels["labels"])
        if lbl not in ["brain"]
    ]

    ica.exclude = artifact_idx
    return ica.apply(raw.copy())


In [5]:
def subject_zscore(raw):
    data = raw.get_data()
    data = (data - data.mean(axis=1, keepdims=True)) / data.std(axis=1, keepdims=True)
    raw._data = data
    return raw


In [6]:
def preprocess_raw(raw):
    # 1. Resampling
    raw.resample(TARGET_SFREQ)

    # 2. Band-pass filtering
    raw.filter(LOW_FREQ, HIGH_FREQ, fir_design="firwin")

    # 3. Common Average Reference
    raw.set_eeg_reference("average")

    # 4. Bad channel detection
    raw.info["bads"] = detect_bad_channels(raw)

    # 5. ICA + ICLabel
    raw = run_ica_iclabel(raw)

    # 6. Interpolate bad channels
    raw.interpolate_bads(reset_bads=True)

    # 7. Subject-level Z-score normalization
    raw = subject_zscore(raw)

    return raw


In [7]:
raws_ds1 = []

subject_dirs = sorted(glob.glob(os.path.join(DS1_PATH, "sub-*")))

for sub in subject_dirs:
    eeg_path = os.path.join(sub, "eeg")
    set_file = glob.glob(os.path.join(eeg_path, "*.set"))[0]

    raw = mne.io.read_raw_eeglab(set_file, preload=True)
    raw = preprocess_raw(raw)

    raws_ds1.append(raw)

print(f"Loaded Dataset-1 subjects: {len(raws_ds1)}")


Reading C:\Users\Usha Sri\OneDrive\Documents\Parkinson_Project\ds004584-download\sub-001\eeg\sub-001_task-Rest_eeg.fdt


Reading 0 ... 140829  =      0.000 ...   281.658 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 50.00 Hz
- Upper transition bandwidth: 12.50 Hz (-6 dB cutoff frequency: 56.25 Hz)
- Filter length: 1651 samples (6.604 s)

EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Fitting ICA to data using 62 channels (please be patient, this may take a while)
Selecting by explained variance: 38 components
Fitting ICA took 2349.8s.


OSError: [WinError 1455] The paging file is too small for this operation to complete. Error loading "C:\Users\Usha Sri\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\torch\lib\cublas64_12.dll" or one of its dependencies.

In [None]:
raws_ds2 = []

vhdr_files = sorted(glob.glob(os.path.join(DS2_PATH, "*.vhdr")))

for vhdr in vhdr_files:
    raw = mne.io.read_raw_brainvision(vhdr, preload=True)
    raw = preprocess_raw(raw)

    raws_ds2.append(raw)

print(f"Loaded Dataset-2 subjects: {len(raws_ds2)}")


In [None]:
EPOCH_LENGTH = 2.0
OVERLAP = 1.0

def make_epochs(raw):
    return mne.make_fixed_length_epochs(
        raw,
        duration=EPOCH_LENGTH,
        overlap=OVERLAP,
        preload=True
    )

epochs_ds1 = [make_epochs(r) for r in raws_ds1]
epochs_ds2 = [make_epochs(r) for r in raws_ds2]


In [None]:
def epochs_to_covariances(epochs):
    return mne.compute_covariance(
        epochs,
        method="oas"
    ).data

covs_ds1 = np.array([epochs_to_covariances(ep) for ep in epochs_ds1])
covs_ds2 = np.array([epochs_to_covariances(ep) for ep in epochs_ds2])

covs_ds1 = np.vstack(covs_ds1)
covs_ds2 = np.vstack(covs_ds2)


In [None]:
def riemannian_recenter(covmats):
    G = mean_riemann(covmats)
    G_inv_sqrt = invsqrtm(G)
    return np.array([G_inv_sqrt @ C @ G_inv_sqrt for C in covmats])

covs_ds1_rc = riemannian_recenter(covs_ds1)
covs_ds2_rc = riemannian_recenter(covs_ds2)

print("Riemannian re-centering completed")


In [None]:
X_all = np.vstack([covs_ds1_rc, covs_ds2_rc])

y_ds1 = np.zeros(len(covs_ds1_rc))  # e.g., Control
y_ds2 = np.ones(len(covs_ds2_rc))   # e.g., PD

y_all = np.hstack([y_ds1, y_ds2])

print("Final Feature Shape:", X_all.shape)
print("Final Labels Shape:", y_all.shape)
