unzip the file and load data in colab environment

In [None]:
import zipfile
import os

zip_path = "/content/ADHD.zip"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
  zip_ref.extractall("/content/data")

declaring bands and defining functions needed

In [None]:
import numpy as np
from scipy.signal import welch, butter, filtfilt, iirnotch

bands = {
    "delta": (0.5, 4),
    "theta": (4, 8),
    "alpha": (8, 13),
    "beta":  (13, 30),
    "gamma": (30, 45)
}

def bandpass_filter(data, low, high, fs, order=4):
    nyq = 0.5 * fs
    low = low/nyq
    high = high/nyq
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, data)

def compute_psd(data, fs, band):
    fmin, fmax = bands[band]
    freqs, psd = welch(data, fs=fs, nperseg=fs*2)
    mask = (freqs >= fmin) & (freqs <= fmax)
    return np.mean(psd[mask])

def notch_filter(data, f0, fs, Q=30):
    nyq = 0.5 * fs
    w0 = f0/nyq
    b, a = iirnotch(w0, Q)
    return filtfilt(b, a, data)

Extrat psd data from mat files and make a csv for it

In [None]:
dataframe_list=[]

In [1]:
import os
import scipy.io as sio
import pandas as pd

data_dir = "/content/data/ADHD/ADHD"

fs = 256

for file_name in os.listdir(data_dir):
    if file_name.endswith(".mat"):
        file_path = os.path.join(data_dir, file_name)

        mat_data = sio.loadmat(file_path, squeeze_me=True)

        key = file_name.split(".")[0]
        eeg_data = mat_data[key]

        n_channels = eeg_data.shape[1]
        all_results = []

        for ch in range(n_channels):
            signal = eeg_data[:, ch]

            filtered_signal = bandpass_filter(signal, 0.5, 45, fs, order=4)
            filtered_signal = notch_filter(filtered_signal, 50, fs)

            for band in bands:
                psd_val = compute_psd(filtered_signal, fs, band)
                all_results.append([ch+1, band, psd_val])

        df = pd.DataFrame(all_results, columns=["Channel", "Band", "PSD"])
        df["ADHD"] = 0

        dataframe_list.append(df)

print(f"Processed {len(dataframe_list)} files into dataframe_list.")


FileNotFoundError: [Errno 2] No such file or directory: '/content/data/ADHD/Control'

In [None]:
import os
import scipy.io as sio
import pandas as pd

data_dir = "/content/data/ADHD/Control"

fs = 256

for file_name in os.listdir(data_dir):
    if file_name.endswith(".mat"):
        file_path = os.path.join(data_dir, file_name)

        mat_data = sio.loadmat(file_path, squeeze_me=True)

        key = file_name.split(".")[0]
        eeg_data = mat_data[key]

        n_channels = eeg_data.shape[1]
        all_results = []

        for ch in range(n_channels):
            signal = eeg_data[:, ch]

            filtered_signal = bandpass_filter(signal, 0.5, 45, fs, order=4)
            filtered_signal = notch_filter(filtered_signal, 50, fs)

            for band in bands:
                psd_val = compute_psd(filtered_signal, fs, band)
                all_results.append([ch+1, band, psd_val])

        df = pd.DataFrame(all_results, columns=["Channel", "Band", "PSD"])
        df["ADHD"] = 0

        # Append to list
        dataframe_list.append(df)

print(f"Processed {len(dataframe_list)} files into dataframe_list.")


FileNotFoundError: [Errno 2] No such file or directory: '/content/data/ADHD/Control'

In [None]:
print(dataframe_list)

[    Channel   Band          PSD  ADHD
0         1  delta  5380.886965     1
1         1  theta  2625.230609     1
2         1  alpha   792.356271     1
3         1   beta   223.754579     1
4         1  gamma    62.610601     1
..      ...    ...          ...   ...
90       19  delta  4483.348468     1
91       19  theta  1881.642610     1
92       19  alpha   745.944210     1
93       19   beta   244.068657     1
94       19  gamma    41.982108     1

[95 rows x 4 columns],     Channel   Band          PSD  ADHD
0         1  delta  4221.374354     1
1         1  theta  2787.175677     1
2         1  alpha   734.487043     1
3         1   beta   151.424033     1
4         1  gamma    31.299449     1
..      ...    ...          ...   ...
90       19  delta  1373.861123     1
91       19  theta   906.200915     1
92       19  alpha   559.252000     1
93       19   beta   234.289061     1
94       19  gamma    47.089732     1

[95 rows x 4 columns],     Channel   Band          PSD  ADHD
0

Change into a single table for easy training 121 X 96

In [None]:
import pandas as pd

def subject_to_features(df):
    pivot_df = df.pivot(index="Channel", columns="Band", values="PSD")

    feature_row = pivot_df.to_numpy().flatten()

    channel_names = [f"Ch{ch}" for ch in pivot_df.index]
    band_names = pivot_df.columns
    flattened_column_names = [f"{ch}_{band}" for ch in channel_names for band in band_names]

    feature_df = pd.DataFrame([feature_row], columns=flattened_column_names)

    feature_df["ADHD"] = df["ADHD"].iloc[0]

    return feature_df

In [None]:
feature_matrix = pd.concat([subject_to_features(df) for df in dataframe_list], ignore_index=True)
print(feature_matrix)

       Ch1_alpha     Ch1_beta     Ch1_delta   Ch1_gamma     Ch1_theta  \
0     792.356271   223.754579   5380.886965   62.610601   2625.230609   
1     734.487043   151.424033   4221.374354   31.299449   2787.175677   
2     817.147133   334.681829   3083.811126   36.413846   1628.801800   
3    1182.705262   477.912733  10614.076587   39.120483   4239.898559   
4     848.601456   204.547854   6636.247511   50.990682   2757.950478   
..           ...          ...           ...         ...           ...   
116  3792.600069  1249.838784  27572.087659  388.500163  10406.035146   
117   433.811649   303.732393   5557.004455   66.941320   1025.228302   
118   374.393023   100.982687   3794.086645   49.640397   1289.153531   
119   930.648724   227.167775   3086.246104   46.713752   1909.180176   
120  1152.124235   345.436010   4717.651998   34.225018   3235.882957   

       Ch2_alpha     Ch2_beta     Ch2_delta   Ch2_gamma    Ch2_theta  ...  \
0     652.834278   217.106029   8413.837029  2

importing models and defining features(X) and traget(Y)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score

X = feature_matrix.drop(columns=["ADHD"])
y = feature_matrix["ADHD"]

Random forest classifier

In [None]:
rf_clf = RandomForestClassifier(n_estimators=200, random_state=42)
rf_scores = cross_val_score(rf_clf, X, y, cv=5, scoring="accuracy")
print("Random Forest Accuracy:", rf_scores.mean())

Random Forest Accuracy: 0.6463333333333334


Logistic regression classifier

In [None]:
log_reg = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(max_iter=500, class_weight="balanced"))
])
log_scores = cross_val_score(log_reg, X, y, cv=5, scoring="accuracy")
print("Logistic Regression Accuracy:", log_scores.mean())

Logistic Regression Accuracy: 0.4959999999999999


Support Vector Machine classifier

In [None]:
svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", SVC(kernel="rbf", class_weight="balanced"))
])
svm_scores = cross_val_score(svm_clf, X, y, cv=5, scoring="accuracy")
print("SVM Accuracy:", svm_scores.mean())

SVM Accuracy: 0.6456666666666668


5 layer vanilla Feedforward Neural Network

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

class FFN(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, dropout=0.3):
        super(FFN, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.Linear(hidden_dim, 32),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.Linear(16, 16),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.Linear(16, 2)
        )

    def forward(self, x):
        return self.layers(x)


input_dim = X.shape[1]
model = FFN(input_dim)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

epochs = 100
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        model.eval()
        with torch.no_grad():
            preds = torch.argmax(model(X_test), dim=1)
            acc = accuracy_score(y_test.numpy(), preds.numpy())
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}, Test Acc: {acc:.4f}")

model.eval()
with torch.no_grad():
    y_pred = torch.argmax(model(X_test), dim=1).numpy()
    final_acc = accuracy_score(y_test.numpy(), y_pred)
    print("Final Test Accuracy:", final_acc)

Epoch 10/100, Loss: 0.6914, Test Acc: 0.4800
Epoch 20/100, Loss: 0.6791, Test Acc: 0.4800
Epoch 30/100, Loss: 0.6588, Test Acc: 0.4800
Epoch 40/100, Loss: 0.6494, Test Acc: 0.4000
Epoch 50/100, Loss: 0.6335, Test Acc: 0.5600
Epoch 60/100, Loss: 0.5975, Test Acc: 0.5200
Epoch 70/100, Loss: 0.5569, Test Acc: 0.4800
Epoch 80/100, Loss: 0.5355, Test Acc: 0.5200
Epoch 90/100, Loss: 0.4912, Test Acc: 0.6000
Epoch 100/100, Loss: 0.3888, Test Acc: 0.5600
Final Test Accuracy: 0.56


FFN with residual coefficients

In [None]:
class ResidualFFN(nn.Module):
    def __init__(self, input_dim, hidden_dim=128, dropout=0.3):
        super(ResidualFFN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, hidden_dim//2)
        self.fc4 = nn.Linear(hidden_dim//2, hidden_dim//4)
        self.fc_out = nn.Linear(hidden_dim//4, 2)

        self.proj2 = nn.Linear(hidden_dim, hidden_dim)
        self.proj4 = nn.Linear(hidden_dim // 2, hidden_dim // 4)

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        out1 = self.relu(self.fc1(x))
        out1 = self.dropout(out1)

        out2 = self.relu(self.fc2(out1))
        out2 = self.dropout(out2)
        if out1.shape[-1] != out2.shape[-1]:
            out2 = out2 + self.proj2(out1)
        else:
            out2 = out2 + out1


        out3 = self.relu(self.fc3(out2))
        out3 = self.dropout(out3)

        out4 = self.relu(self.fc4(out3))
        out4 = self.dropout(out4)
        if out3.shape[-1] != out4.shape[-1]:
             out4 = out4 + self.proj4(out3)
        else:
            out4 = out4 + out3


        out = self.fc_out(out4)
        return out

input_dim = X.shape[1]
model = ResidualFFN(input_dim)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

epochs = 100
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        model.eval()
        with torch.no_grad():
            preds = torch.argmax(model(X_test), dim=1)
            acc = accuracy_score(y_test.numpy(), preds.numpy())
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}, Test Acc: {acc:.4f}")

model.eval()
with torch.no_grad():
    y_pred = torch.argmax(model(X_test), dim=1).numpy()
    final_acc = accuracy_score(y_test.numpy(), y_pred)
    print("Final Test Accuracy:", final_acc)

Epoch 10/100, Loss: 0.6298, Test Acc: 0.4000
Epoch 20/100, Loss: 0.5579, Test Acc: 0.5600
Epoch 30/100, Loss: 0.4312, Test Acc: 0.7200
Epoch 40/100, Loss: 0.2756, Test Acc: 0.5200
Epoch 50/100, Loss: 0.1541, Test Acc: 0.5600
Epoch 60/100, Loss: 0.1094, Test Acc: 0.5200
Epoch 70/100, Loss: 0.0513, Test Acc: 0.4400
Epoch 80/100, Loss: 0.0244, Test Acc: 0.4800
Epoch 90/100, Loss: 0.0381, Test Acc: 0.4800
Epoch 100/100, Loss: 0.0099, Test Acc: 0.6800
Final Test Accuracy: 0.68


CNN

In [None]:
X = feature_matrix.drop(columns=["ADHD"]).values
y = feature_matrix["ADHD"].values

X_reshaped = X.reshape(-1, 19, 5)

scaler = StandardScaler()
X_reshaped = scaler.fit_transform(X.reshape(-1, X.shape[1])).reshape(-1, 19, 5)

X_train, X_test, y_train, y_test = train_test_split(
    X_reshaped, y, test_size=0.2, stratify=y, random_state=42
)

X_train = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)
X_test = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

class EEG_CNN(nn.Module):
    def __init__(self):
        super(EEG_CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=(3,3), padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=(3,3), padding=1)
        self.pool = nn.MaxPool2d((2,2))
        self.dropout = nn.Dropout(0.3)
        self.relu = nn.ReLU()

        self._to_linear = None
        self._get_conv_output_size(X_train)


        self.fc1 = nn.Linear(self._to_linear, 64)
        self.fc2 = nn.Linear(64, 2)


    def _get_conv_output_size(self, x):
        with torch.no_grad():
            x = self.pool(self.relu(self.conv2(self.relu(self.conv1(x)))))
            self._to_linear = x.view(x.size(0), -1).size(1)


    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

model = EEG_CNN()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
epochs = 50

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        model.eval()
        with torch.no_grad():
            preds = torch.argmax(model(X_test), dim=1)
            acc = accuracy_score(y_test.numpy(), preds.numpy())
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}, Test Acc: {acc:.4f}")

model.eval()
with torch.no_grad():
    y_pred = torch.argmax(model(X_test), dim=1).numpy()
    final_acc = accuracy_score(y_test.numpy(), y_pred)
    print("Final Test Accuracy:", final_acc)

Epoch 10/50, Loss: 0.6209, Test Acc: 0.3600
Epoch 20/50, Loss: 0.5681, Test Acc: 0.5200
Epoch 30/50, Loss: 0.4968, Test Acc: 0.7200
Epoch 40/50, Loss: 0.3743, Test Acc: 0.6000
Epoch 50/50, Loss: 0.3023, Test Acc: 0.5600
Final Test Accuracy: 0.56
