In [1]:
import numpy as np
import pandas as pd

In [2]:
INPUT_CSV = "./adhdata.csv"
OUTPUT_CSV = "processed_adhdata.csv"
sample_rate = 128             # Hz
window_seconds = 4            # window duration
overlap = 0.5                 # 50% overlap
samples_per_window = int(sample_rate * window_seconds)
step = int(samples_per_window * (1 - overlap))  # step size for sliding window
freqs = np.arange(2.0, 40.5, 0.5)                 # target frequency bins

In [3]:
df = pd.read_csv(INPUT_CSV)
print("Dataset shape:", df.shape)
print("Columns:", df.columns.tolist())

indices = df['ID'].unique()
print(f"Unique IDs ({len(indices)}): {indices}")

results = []
window_count = 0

Dataset shape: (2166383, 21)
Columns: ['Fp1', 'Fp2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1', 'O2', 'F7', 'F8', 'T7', 'T8', 'P7', 'P8', 'Fz', 'Cz', 'Pz', 'Class', 'ID']
Unique IDs (121): ['v10p' 'v12p' 'v14p' 'v15p' 'v173' 'v18p' 'v19p' 'v1p' 'v20p' 'v21p'
 'v22p' 'v24p' 'v25p' 'v27p' 'v28p' 'v29p' 'v30p' 'v31p' 'v32p' 'v33p'
 'v34p' 'v35p' 'v36p' 'v37p' 'v38p' 'v39p' 'v3p' 'v40p' 'v6p' 'v8p' 'v177'
 'v179' 'v181' 'v183' 'v190' 'v196' 'v198' 'v200' 'v204' 'v206' 'v209'
 'v213' 'v215' 'v219' 'v227' 'v231' 'v234' 'v236' 'v238' 'v244' 'v246'
 'v250' 'v254' 'v263' 'v265' 'v270' 'v274' 'v279' 'v284' 'v286' 'v288'
 'v107' 'v108' 'v109' 'v110' 'v111' 'v112' 'v113' 'v114' 'v115' 'v116'
 'v41p' 'v42p' 'v43p' 'v44p' 'v45p' 'v46p' 'v47p' 'v48p' 'v49p' 'v50p'
 'v51p' 'v52p' 'v53p' 'v54p' 'v55p' 'v56p' 'v57p' 'v58p' 'v59p' 'v60p'
 'v117' 'v118' 'v120' 'v121' 'v123' 'v125' 'v127' 'v129' 'v131' 'v133'
 'v134' 'v138' 'v140' 'v143' 'v147' 'v149' 'v151' 'v297' 'v298' 'v299'
 'v300' 'v302' 'v303' 'v304'

In [4]:
def process_window(window, saved_class, idx, window_count):
    # numeric columns = EEG electrodes
    electrode_columns = window.select_dtypes(include=[np.number]).columns

    n = len(window)
    original_freqs = np.fft.rfftfreq(n, d=1 / sample_rate)

    # compute power spectra for all electrodes
    electrode_powers = {}
    for electrode in electrode_columns:
        signal = window[electrode].to_numpy()
        fft_vals = np.fft.rfft(signal)
        power = np.abs(fft_vals) ** 2

        # interpolate to common freq bins
        electrode_powers[electrode] = np.interp(freqs, original_freqs, power)

    # build rows: one per frequency bin
    for i, f in enumerate(freqs):
        row = {
            "ID": idx,
            "Class": saved_class,
            "Window": window_count,
            "Frequency": f
        }
        for electrode in electrode_columns:
            row[electrode] = electrode_powers[electrode][i]
        results.append(row)

In [5]:
for idx_i, idx in enumerate(indices):
    print(f"\nProcessing ID {idx_i+1}/{len(indices)}: {idx}")
    subset = df[df['ID'] == idx].reset_index(drop=True)
    saved_class = subset['Class'].unique()
    if len(saved_class) != 1:
        raise ValueError(f"ID {idx} has multiple classes: {saved_class}")
    saved_class = saved_class[0]

    start_windows = 0
    n_samples = len(subset)

    # Sliding window with overlap
    for start in range(0, n_samples - samples_per_window + 1, step):
        window = subset.iloc[start:start + samples_per_window]
        process_window(window, saved_class, idx, window_count)

        window_count += 1
        start_windows += 1
    print(f"  Windows generated for ID {idx}: {start_windows}")


Processing ID 1/121: v10p
  Windows generated for ID v10p: 54

Processing ID 2/121: v12p
  Windows generated for ID v12p: 67

Processing ID 3/121: v14p
  Windows generated for ID v14p: 67

Processing ID 4/121: v15p
  Windows generated for ID v15p: 167

Processing ID 5/121: v173
  Windows generated for ID v173: 93

Processing ID 6/121: v18p
  Windows generated for ID v18p: 96

Processing ID 7/121: v19p
  Windows generated for ID v19p: 89

Processing ID 8/121: v1p
  Windows generated for ID v1p: 46

Processing ID 9/121: v20p
  Windows generated for ID v20p: 137

Processing ID 10/121: v21p
  Windows generated for ID v21p: 63

Processing ID 11/121: v22p
  Windows generated for ID v22p: 46

Processing ID 12/121: v24p
  Windows generated for ID v24p: 63

Processing ID 13/121: v25p
  Windows generated for ID v25p: 37

Processing ID 14/121: v27p
  Windows generated for ID v27p: 111

Processing ID 15/121: v28p
  Windows generated for ID v28p: 106

Processing ID 16/121: v29p
  Windows generated

In [6]:
outer_df = pd.DataFrame(results)
print("\nFinal shape:", outer_df.shape)
outer_df.to_csv(OUTPUT_CSV, index=False)
print(f"Saved to {OUTPUT_CSV}")


Final shape: (637483, 23)
Saved to processed_adhdata.csv
