In [11]:
import pandas as pd
import numpy as np
import joblib
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix

# 1) Generate synthetic dataset (matching simulator features)
np.random.seed(42)
n_samples = 1000
data = pd.DataFrame({
    "hr": np.random.randint(55, 120, size=n_samples),
    "bp_sys": np.random.randint(95, 180, size=n_samples),
    "spo2": np.random.randint(80, 100, size=n_samples)
})

# Create a simple risk label
data["high_risk"] = (
    (data["hr"] < 60) | (data["hr"] > 100) |
    (data["bp_sys"] > 140) |
    (data["spo2"] < 92)
).astype(int)

# 2) Split features & target
X = data[["hr", "bp_sys", "spo2"]]
y = data["high_risk"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 3) Train model
model = GradientBoostingClassifier(
    n_estimators=200,
    learning_rate=0.05,
    max_depth=3,
    random_state=42
)
model.fit(X_train, y_train)

# 4) Evaluate
y_pred_prob = model.predict_proba(X_test)[:, 1]
roc_auc = roc_auc_score(y_test, y_pred_prob)
print("ROC AUC:", roc_auc)
y_pred = (y_pred_prob > 0.7).astype(int)  # Convert probabilities to 0/1
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


# 5) Save model in the correct folder
joblib.dump(model, "../models/risk_model.joblib")
print("✅ Model saved to ../models/risk_model.joblib")


ROC AUC: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        25
           1       1.00      1.00      1.00       175

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200


Confusion Matrix:
 [[ 25   0]
 [  0 175]]
✅ Model saved to ../models/risk_model.joblib
