In [5]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

# Load dataset
file_path = "epileptic_seizure_recognition.csv"  # Update with your file path
df = pd.read_csv(file_path)

# Rename last column to 'label'
df.rename(columns={df.columns[-1]: "label"}, inplace=True)

In [6]:
# Drop the first column if it contains non-numeric values
df_cleaned = df.iloc[:, 1:]

# ✅ Filter dataset to only include classes 1, 2, 3 (Remove classes 4 and 5)
df_filtered = df_cleaned[df_cleaned["label"].isin([1, 2, 3])]

# Separate features and labels
X = df_filtered.iloc[:, :-1].values  # EEG features
y = df_filtered["label"].values      # Labels (1, 2, 3)

In [7]:
# Normalize features (Standardization)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [8]:
# Encode labels (Convert 1,2,3 → 0,1,2 for XGBoost)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # Converts [1,2,3] → [0,1,2]


In [9]:
# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y)

In [10]:
# ✅ Optimized XGBoost model with better hyperparameters
xgb_model = XGBClassifier(
    objective='multi:softmax',  # Multiclass classification
    num_class=3,                # 3 classes (Seizure, Tumor, Healthy)
    eval_metric='mlogloss',      # Multi-class log loss
    learning_rate=0.05,          # Smaller learning rate (stabilizes training)
    max_depth=8,                 # Deeper trees capture complex patterns
    n_estimators=500,            # More trees improve learning
    colsample_bytree=0.8,        # Feature sampling per tree
    subsample=0.8,               # Row sampling for regularization
    scale_pos_weight={0: 1.0, 1: 1.2, 2: 1.2}  # Slight class balancing
)

In [11]:
# Train XGBoost model
xgb_model.fit(X_train, y_train)

Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [12]:
# Make predictions
y_pred = xgb_model.predict(X_test)

In [13]:
# Generate and print classification report
report = classification_report(y_test, y_pred, target_names=["Seizure Activity", "Tumor Area", "Healthy Region"])
print(report)

                  precision    recall  f1-score   support

Seizure Activity       0.95      0.97      0.96       460
      Tumor Area       0.66      0.63      0.64       460
  Healthy Region       0.65      0.68      0.67       460

        accuracy                           0.76      1380
       macro avg       0.76      0.76      0.76      1380
    weighted avg       0.76      0.76      0.76      1380

