In [5]:
import sys
import os

sys.path.append(os.path.abspath(os.path.join('..')))

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import joblib
import pandas as pd


from app.ml.data_loader import load_inventory_dataset

In [6]:
from app.database.connection import SessionLocal
db = SessionLocal()

# Load Data & Features
df = load_inventory_dataset(db)

if df.empty:
    print("⚠️ Dataset is empty.")
    
df.head()

Not enough inventory rows (5), creating dataset from stock history


Unnamed: 0,pharmacy_id,medication_id,quantity,old_quantity,new_quantity,changed_at
0,1,1,5,20,5,2026-02-09 21:55:19.647410
1,1,2,30,40,30,2026-02-09 21:55:19.657839
2,1,3,100,0,100,2026-02-10 11:28:16.580215
3,2,1,8,15,8,2026-02-09 21:55:19.657896
4,2,3,50,55,50,2026-02-09 21:55:19.657918


In [7]:
SHORTAGE_THRESHOLD = 10
LOW_STOCK_THRESHOLD = 20

if not df.empty:

    # Target Variable
    df["shortage"] = (df["quantity"] <= SHORTAGE_THRESHOLD).astype(int)

    df["stock_change"] = df["new_quantity"] - df["old_quantity"]

    # Low Stock Indicator
    df["is_low_stock"] = (df["quantity"] < LOW_STOCK_THRESHOLD).astype(int)

    # Medication frequency (how often appears)
    df["medication_freq"] = (
        df.groupby("medication_id")["medication_id"].transform("count")
    )


    print("✅ Feature engineering complete.")
    df.head()


✅ Feature engineering complete.


In [8]:
if not df.empty:

    df["pharmacy_id"] = df["pharmacy_id"].astype("category").cat.codes
    df["medication_id"] = df["medication_id"].astype("category").cat.codes


In [9]:
if not df.empty:

    features = [
        "quantity",
        "stock_change",
        "is_low_stock",
        "medication_freq",
        "pharmacy_id",
        "medication_id"
    ]

    target = "shortage"

    X = df[features]
    y = df[target]

    print("Final dataset shape:", X.shape)


Final dataset shape: (5, 6)


In [None]:
MIN_SAMPLES = 10

if not df.empty and len(df) >= MIN_SAMPLES:

    # Train/Test Split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # Model Training
    model = RandomForestClassifier(
        n_estimators=100,
        random_state=42
    )

    model.fit(X_train, y_train)

    # Model Evaluation
    y_pred = model.predict(X_test)

    print("Classification Report:")
    print(classification_report(y_test, y_pred))

    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

    # Save model
    os.makedirs("app/ml", exist_ok=True)
    joblib.dump(model, "app/ml/shortage_model.joblib")
    print("✅ Model saved successfully!")

    # Example prediction
    sample = X_test.iloc[0:1]
    risk = model.predict_proba(sample)[0][1]
    print(f"Shortage probability: {risk:.2f}")

else:
    print("⛔ Not enough data to train model.")


Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       1.0
           1       0.00      0.00      0.00       0.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0

Confusion Matrix:
[[0 1]
 [0 0]]
✅ Model saved successfully!
Shortage probability: 0.60


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
