In [1]:
import kagglehub
import pandas as pd
from sklearn.decomposition import PCA
import numpy as np
import matplotlib.pyplot as plt
import pywt
from scipy.signal import welch
from scipy.stats import skew, kurtosis

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load dataset
path = kagglehub.dataset_download("harunshimanto/epileptic-seizure-recognition")
df = pd.read_csv(path + "\\Epileptic Seizure Recognition.csv", index_col=0)
df.head()

# transform target variable to reduce into a binary classification problem
# y \in {1: seizure, 0: no seizure}
df["y"] = df["y"].apply(lambda x: 0 if x != 1 else 1)
df.head()

Unnamed: 0_level_0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X170,X171,X172,X173,X174,X175,X176,X177,X178,y
Unnamed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
X21.V1.791,135,190,229,223,192,125,55,-9,-33,-38,...,-17,-15,-31,-77,-103,-127,-116,-83,-51,0
X15.V1.924,386,382,356,331,320,315,307,272,244,232,...,164,150,146,152,157,156,154,143,129,1
X8.V1.1,-32,-39,-47,-37,-32,-36,-57,-73,-85,-94,...,57,64,48,19,-12,-30,-35,-35,-36,0
X16.V1.60,-105,-101,-96,-92,-89,-95,-102,-100,-87,-79,...,-82,-81,-80,-77,-85,-77,-72,-69,-65,0
X20.V1.54,-9,-65,-98,-102,-78,-48,-16,0,-21,-59,...,4,2,-12,-32,-41,-65,-83,-89,-73,0


In [3]:
X = df.drop("y", axis=1)
y = df["y"]

In [4]:
def extract_time_domain_features(row: pd.Series) -> pd.Series:
    """Extracts features from a base 1s time series."""
    signal = row.values.astype(float)

    features = {}

    features["mean"] = np.mean(signal)
    features["std"] = np.std(signal)
    features["var"] = np.var(signal)
    features["min"] = np.min(signal)
    features["max"] = np.max(signal)
    features["skew"] = skew(signal)
    features["kurtosis"] = kurtosis(signal)
    features["rms"] = np.sqrt(np.mean(signal**2))
    features["zero_crossings"] = len(np.where(np.diff(np.signbit(signal)))[0])

    return pd.Series(features)

In [5]:
features_df = pd.DataFrame()
features_df = X.apply(extract_time_domain_features, axis=1)
features_df

Unnamed: 0_level_0,mean,std,var,min,max,skew,kurtosis,rms,zero_crossings
Unnamed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
X21.V1.791,-16.910112,95.710958,9160.587426,-281.0,229.0,-0.200327,0.067411,97.193309,23.0
X15.V1.924,28.112360,471.835823,222629.043555,-1716.0,513.0,-1.511088,1.341888,472.672559,8.0
X8.V1.1,-44.044944,44.186380,1952.436182,-126.0,80.0,0.494485,-0.240414,62.389048,12.0
X16.V1.60,-68.910112,15.923723,253.564954,-105.0,-22.0,0.367125,0.212160,70.726010,0.0
X20.V1.54,-6.651685,38.693001,1497.148340,-103.0,78.0,-0.462741,-0.250516,39.260581,32.0
...,...,...,...,...,...,...,...,...,...
X22.V1.114,5.157303,38.268536,1464.480874,-79.0,73.0,-0.185539,-0.966425,38.614488,8.0
X19.V1.354,5.674157,163.078548,26594.612928,-388.0,471.0,0.009039,0.523230,163.177232,11.0
X8.V1.28,6.752809,44.164855,1950.534402,-90.0,121.0,0.092115,-0.410145,44.678125,25.0
X10.V1.932,-38.842697,63.428346,4023.155031,-157.0,148.0,0.519187,0.272000,74.376812,9.0


In [6]:
def extract_frequency_domain_features(row, fs=178):
    """Extract Spectral Power Density (PSD) features from a row of a signal."""

    features = {}

    # Compute Power Spectral Density using Welch's method.
    freqs, psd = welch(row, fs=fs, nperseg=len(row))

    # Helper function to compute band power using integration.
    def bandpower(psd, freqs, fmin, fmax):
        idx = np.logical_and(freqs >= fmin, freqs <= fmax)
        area = np.trapz(psd[idx], freqs[idx])  # integrate along freq axis
        return area

    # Define typical EEG frequency bands (Hz)
    features["delta_power"] = bandpower(psd, freqs, 0.5, 4)
    features["theta_power"] = bandpower(psd, freqs, 4, 8)
    features["alpha_power"] = bandpower(psd, freqs, 8, 13)
    features["beta_power"] = bandpower(psd, freqs, 13, 30)
    features["gamma_power"] = bandpower(psd, freqs, 30, fs / 2)

    return pd.Series(features)

In [7]:
features_df_bis = pd.DataFrame()
features_df_bis = X.apply(extract_frequency_domain_features, axis=1)
features_df = pd.concat([features_df, features_df_bis], axis=1)
features_df

Unnamed: 0_level_0,mean,std,var,min,max,skew,kurtosis,rms,zero_crossings,delta_power,theta_power,alpha_power,beta_power,gamma_power
Unnamed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
X21.V1.791,-16.910112,95.710958,9160.587426,-281.0,229.0,-0.200327,0.067411,97.193309,23.0,2531.504216,154.681371,3184.617042,5194.059337,90.945865
X15.V1.924,28.112360,471.835823,222629.043555,-1716.0,513.0,-1.511088,1.341888,472.672559,8.0,141647.645956,64889.280488,24660.943649,29545.678976,547.575979
X8.V1.1,-44.044944,44.186380,1952.436182,-126.0,80.0,0.494485,-0.240414,62.389048,12.0,288.399389,311.355507,300.520181,486.184900,13.772599
X16.V1.60,-68.910112,15.923723,253.564954,-105.0,-22.0,0.367125,0.212160,70.726010,0.0,69.356387,28.827769,57.394592,73.146142,6.538426
X20.V1.54,-6.651685,38.693001,1497.148340,-103.0,78.0,-0.462741,-0.250516,39.260581,32.0,221.391774,127.496323,146.549196,322.243315,35.546279
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
X22.V1.114,5.157303,38.268536,1464.480874,-79.0,73.0,-0.185539,-0.966425,38.614488,8.0,412.564908,453.516933,122.380570,11.106535,4.966532
X19.V1.354,5.674157,163.078548,26594.612928,-388.0,471.0,0.009039,0.523230,163.177232,11.0,1899.417992,10204.530106,4096.579741,2273.298170,37.826039
X8.V1.28,6.752809,44.164855,1950.534402,-90.0,121.0,0.092115,-0.410145,44.678125,25.0,453.110904,75.283150,143.449374,762.980044,131.436605
X10.V1.932,-38.842697,63.428346,4023.155031,-157.0,148.0,0.519187,0.272000,74.376812,9.0,578.777539,471.312377,866.189851,189.770426,17.173990


In [8]:
def extract_wavelet_nonlinear_features(signal):
    """Extract Wavelet Transform and Non-Linear (Approximate Entropy) features from a signal."""
    features = {}

    # Decompose the signal using the Discrete Wavelet Transform.
    # 'db4' is a commonly used wavelet for EEG.
    coeffs = pywt.wavedec(signal, "db4", level=4)
    for i, coeff in enumerate(coeffs):
        features[f"wavelet_energy_level_{i}"] = np.sum(np.square(coeff))

    # === Non-Linear Features: Approximate Entropy ===
    def approximate_entropy(U, m, r):
        """Compute approximate entropy of a time series U."""
        N = len(U)

        def _phi(m):
            # Create a list of vectors of length m.
            x = np.array([U[i : i + m] for i in range(N - m + 1)])
            # Compute the Chebyshev distance between vectors.
            C = np.sum(np.max(np.abs(x[:, None] - x[None, :]), axis=2) <= r, axis=0) / (
                N - m + 1
            )
            # Avoid log(0) by replacing zeros with a small constant.
            C[C == 0] = 1e-10
            return np.sum(np.log(C)) / (N - m + 1)

        return _phi(m) - _phi(m + 1)

    # Set the tolerance parameter based on the standard deviation of the signal.
    r = 0.2 * np.std(signal)
    try:
        features["approx_entropy"] = approximate_entropy(signal, m=2, r=r)
    except Exception as e:
        features["approx_entropy"] = np.nan

    return pd.Series(features)

In [9]:
features_df_bis = pd.DataFrame()
features_df_bis = X.apply(extract_wavelet_nonlinear_features, axis=1)

features_df = pd.concat([features_df, features_df_bis], axis=1)

features_df

Unnamed: 0_level_0,mean,std,var,min,max,skew,kurtosis,rms,zero_crossings,delta_power,theta_power,alpha_power,beta_power,gamma_power,wavelet_energy_level_0,wavelet_energy_level_1,wavelet_energy_level_2,wavelet_energy_level_3,wavelet_energy_level_4,approx_entropy
Unnamed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
X21.V1.791,-16.910112,95.710958,9160.587426,-281.0,229.0,-0.200327,0.067411,97.193309,23.0,2531.504216,154.681371,3184.617042,5194.059337,90.945865,3.690802e+06,1.535263e+05,7.258087e+05,1.070282e+05,7569.491844,0.674851
X15.V1.924,28.112360,471.835823,222629.043555,-1716.0,513.0,-1.511088,1.341888,472.672559,8.0,141647.645956,64889.280488,24660.943649,29545.678976,547.575979,2.982845e+07,1.164573e+07,6.569807e+06,2.279285e+06,74217.980651,0.321345
X8.V1.1,-44.044944,44.186380,1952.436182,-126.0,80.0,0.494485,-0.240414,62.389048,12.0,288.399389,311.355507,300.520181,486.184900,13.772599,5.954840e+05,9.436542e+04,1.429052e+05,1.860976e+04,1716.899579,0.757659
X16.V1.60,-68.910112,15.923723,253.564954,-105.0,-22.0,0.367125,0.212160,70.726010,0.0,69.356387,28.827769,57.394592,73.146142,6.538426,1.686014e+06,1.825810e+04,1.450674e+04,2.511453e+03,813.688912,0.880430
X20.V1.54,-6.651685,38.693001,1497.148340,-103.0,78.0,-0.462741,-0.250516,39.260581,32.0,221.391774,127.496323,146.549196,322.243315,35.546279,5.950417e+05,5.298987e+04,7.528144e+04,3.773313e+04,2503.114777,0.692749
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
X22.V1.114,5.157303,38.268536,1464.480874,-79.0,73.0,-0.185539,-0.966425,38.614488,8.0,412.564908,453.516933,122.380570,11.106535,4.966532,2.975380e+05,5.223182e+04,1.085729e+04,1.632096e+03,302.940919,0.611491
X19.V1.354,5.674157,163.078548,26594.612928,-388.0,471.0,0.009039,0.523230,163.177232,11.0,1899.417992,10204.530106,4096.579741,2273.298170,37.826039,1.489779e+06,5.494694e+06,2.889903e+05,5.831723e+04,1831.618674,0.451915
X8.V1.28,6.752809,44.164855,1950.534402,-90.0,121.0,0.092115,-0.410145,44.678125,25.0,453.110904,75.283150,143.449374,762.980044,131.436605,2.063650e+05,2.196174e+04,8.364039e+04,4.213575e+04,9743.504069,0.913582
X10.V1.932,-38.842697,63.428346,4023.155031,-157.0,148.0,0.519187,0.272000,74.376812,9.0,578.777539,471.312377,866.189851,189.770426,17.173990,8.424921e+05,1.486457e+05,5.561382e+04,1.275353e+04,2016.948235,0.611124


In [12]:
transformed_df = pd.concat([features_df, y], axis=1)
transformed_df.to_csv("transformed_df.csv")