 # 1.Import libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score
import joblib

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.linear_model import LogisticRegression

# 2.Load Oxford Parkinson's dataset

In [None]:
data_path = "/kaggle/input/netntyj/parkinsons.data"
data = pd.read_csv(data_path)

# Drop non-numeric columns
X = data.drop(columns=["name", "status"])
y = data["status"]

# Convert all features to float
X = X.astype(float)

# 3.Train/Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

 # 4.Feature Scaling

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5.Train individual models

In [None]:
xgb_model = XGBClassifier(eval_metric='logloss', random_state=42)
lgb_model = LGBMClassifier(random_state=42)
cat_model = CatBoostClassifier(verbose=0, random_state=42)

xgb_model.fit(X_train_scaled, y_train)
lgb_model.fit(X_train_scaled, y_train)
cat_model.fit(X_train_scaled, y_train)

# 6.Stacking / Meta Model

In [None]:
# Predict probabilities for meta model
xgb_pred = xgb_model.predict_proba(X_train_scaled)[:, 1]
lgb_pred = lgb_model.predict_proba(X_train_scaled)[:, 1]
cat_pred = cat_model.predict_proba(X_train_scaled)[:, 1]

stack_input = np.column_stack((xgb_pred, lgb_pred, cat_pred))

meta_model = LogisticRegression()
meta_model.fit(stack_input, y_train)

# 7.Evaluate

In [None]:
xgb_pred_test = xgb_model.predict_proba(X_test_scaled)[:, 1]
lgb_pred_test = lgb_model.predict_proba(X_test_scaled)[:, 1]
cat_pred_test = cat_model.predict_proba(X_test_scaled)[:, 1]

stack_input_test = np.column_stack((xgb_pred_test, lgb_pred_test, cat_pred_test))
final_pred_test = meta_model.predict(stack_input_test)

accuracy = accuracy_score(y_test, final_pred_test)
roc = roc_auc_score(y_test, meta_model.predict_proba(stack_input_test)[:, 1])

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test ROC-AUC: {roc:.4f}")

[LightGBM] [Info] Number of positive: 118, number of negative: 38
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000144 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1127
[LightGBM] [Info] Number of data points in the train set: 156, number of used features: 22
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.756410 -> initscore=1.133098
[LightGBM] [Info] Start training from score 1.133098
Test Accuracy: 0.9231
Test ROC-AUC: 0.9724


# 8.Save The Model

In [9]:
joblib.dump(scaler, "scaler.pkl")
joblib.dump(xgb_model, "xgb_model.pkl")
joblib.dump(lgb_model, "lgb_model.pkl")
joblib.dump(cat_model, "cat_model.pkl")
joblib.dump(meta_model, "meta_model.pkl")

['meta_model.pkl']