# Introduction

# EEG Data Analysis for Neurological Disorders

## Introduction

This notebook analyzes EEG data to compare brain activity among three groups: 
- **AD (Alzheimer's Disease)**
- **HC (Healthy Control)**
- **FTD (Frontotemporal Dementia)**

I have compute Power Spectral Density (PSD) using the Welch method, visualize frequency distributions, and generate topographic maps to highlight differences between groups. The goal is to identify EEG-based biomarkers for neurological disorders.

<span style="color:blue; font-size:20px;">
If you find this notebook useful, leave an upvote üëç; that motivates me to contribute more! üöÄüòäüíô
</span>


In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import mne

# Setup

In [None]:
sns.set_style("whitegrid")

DATA_PATH = "/kaggle/input/openneuro-ds004504/ds004504"

# Load subject

In [None]:
def load_subject(subject_id: int, path: str = DATA_PATH) -> mne.io.Raw:
    """loads subject using their numeric id in the data folders"""
    return mne.io.read_raw_eeglab(path + '/derivatives/sub-' + str(subject_id).zfill(3)
                                  + '/eeg/sub-' + str(subject_id).zfill(3) + '_task-eyesclosed_eeg.set', preload=True, verbose='CRITICAL')

In [None]:
raw = load_subject(15)

In [None]:
print(raw.info)

# Visualize raw

In [None]:
raw.plot(n_channels=19, scalings='auto', title='Raw EEG Data Example', show=True)

In [None]:
raw.plot_psd(fmin=0.5, fmax=45)

# Histogram of MMSE scores

In [None]:
df_metadata = pd.read_csv("/kaggle/input/openneuro-ds004504/ds004504/participants.tsv", sep="\t")

plt.figure(figsize=(8, 5))
sns.histplot(df_metadata['MMSE'], bins=10, kde=True)
plt.xlabel("MMSE Score")
plt.ylabel("Count")
plt.title("Distribution of MMSE Scores")
plt.show()

# Distribution of Band Power across channels

In [None]:
data, times = raw[:]
eeg_df = pd.DataFrame(data.T, columns=raw.ch_names)
eeg_df['Time'] = times

# Compute band power for different frequency bands
def bandpower(data, sf, band, window_sec=4):
    from scipy.signal import welch
    band = np.array(band)
    low, high = band
    nperseg = int(window_sec * sf)
    freqs, psd = welch(data, sf, nperseg=nperseg)
    idx_band = np.logical_and(freqs >= low, freqs <= high)
    return np.trapz(psd[idx_band], freqs[idx_band])

sf = raw.info['sfreq']
bands = {'Delta': (0.5, 4), 'Theta': (4, 8), 'Alpha': (8, 13), 'Beta': (13, 30)}
band_powers = {band: [] for band in bands}

for ch in raw.ch_names:
    signal = eeg_df[ch].values
    for band, freq_range in bands.items():
        band_powers[band].append(bandpower(signal, sf, freq_range))

band_powers_df = pd.DataFrame(band_powers, index=raw.ch_names)
print("Band power summary:")
print(band_powers_df)

# Visualize band power distribution
plt.figure(figsize=(12, 6))
sns.boxplot(data=band_powers_df)
plt.xlabel("Frequency Band")
plt.ylabel("Power")
plt.title("Distribution of Band Power Across Channels")
plt.show()

# Psd comparision between 3 classes

In [None]:
group_ranges = {
    'AD': range(1, 37),      # subjects 1-36 for AD
    'HC': range(37, 66),    # subjects 37-65 for HC
    'FTD': range(66, 89)      # subjects 66-89 for FTD
}

group_psd = {}  

for group, subject_ids in group_ranges.items():
    spectrum_array_all = []
    for i in subject_ids:
        raw = load_subject(i)  
        spectrum = raw.compute_psd(method='welch', 
                                   fmin=0.5, 
                                   fmax=45, 
                                   n_fft=int(4*raw.info['sfreq']), 
                                   verbose=False)
        spectrum_array, freqs = spectrum.get_data(return_freqs=True)
        spectrum_array = (10**12) * spectrum_array  
        spectrum_array_all.append(spectrum_array)
    avg_psd = np.mean(np.array(spectrum_array_all), axis=0)
    group_psd[group] = avg_psd

# Plot
plt.figure(figsize=(15, 6))
for group, avg_psd in group_psd.items():
    mean_psd = np.mean(avg_psd, axis=0)
    plt.plot(freqs[10:60], mean_psd[10:60], '-o', label=group)
plt.xlabel('Frequency (Hz)')
plt.ylabel('Power Spectrum ($\\mu V^2 / Hz$)')
plt.title("Average Power Spectrum Density Comparison Among Groups")
plt.grid()
plt.legend()
plt.show()

# Psd comparision between channels

In [None]:
for group, avg_psd in group_psd.items():
    plt.figure(figsize=(15, 6))
    # avg_psd shape: (n_channels, n_freqs)
    for ch_idx in range(avg_psd.shape[0]):
        plt.plot(freqs[10:60], avg_psd[ch_idx, 10:60], '-o', label=raw.info.ch_names[ch_idx])
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Power Spectrum ($\\mu V^2 / Hz$)')
    plt.title(f"Average PSD for {group} Group")
    plt.grid()
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.show()

# Topomap comparision each group

In [None]:
group_topomap = {}
for group, avg_psd in group_psd.items():
    topo = np.mean(avg_psd[:, 10:60], axis=1) 
    group_topomap[group] = topo

fig, axes = plt.subplots(1, len(group_topomap), figsize=(15, 5))
for ax, (group, topo) in zip(axes, group_topomap.items()):
    im, cbar = mne.viz.plot_topomap(topo, raw.info, axes=ax, cmap='viridis', show=False)
    ax.set_title(f"{group} Group")
    fig.colorbar(im, ax=ax)
plt.show()

# Conclusion

#### Further analysis may involve feature extraction, classification, and connectivity analysis.  
