In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings("ignore")

# Load dataset
df = pd.read_csv("../data/data.csv")

# Add BMI_Class column
def classify_bmi(bmi):
    if bmi < 18.5:
        return "Underweight"
    elif bmi < 25:
        return "Normal"
    elif bmi < 30:
        return "Overweight"
    else:
        return "Obese"

df["BMI_Class"] = df["BMI"].apply(classify_bmi)
df = df.drop(columns=["BMI"])

# One-hot encode categorical variables
categorical_cols = ["Gender", "Workout_Type"]
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

# Encode target variable
label_encoder = LabelEncoder()
df["BMI_Class_Encoded"] = label_encoder.fit_transform(df["BMI_Class"])

# Define selected features without Water_Intake
selected_features = ['Weight (kg)', 'Height (m)', 'Experience_Level', 'Gender_Male']

# Feature matrix and target vector
X = df[selected_features]
y = df["BMI_Class_Encoded"]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.25, random_state=42
)

# Train Logistic Regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predictions and evaluation
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

print(f"✅ Training Accuracy: {accuracy_score(y_train, y_train_pred):.4f}")
print(f"✅ Testing Accuracy:  {accuracy_score(y_test, y_test_pred):.4f}")

print("📊 Classification Report (Test):")
print(classification_report(
    label_encoder.inverse_transform(y_test),
    label_encoder.inverse_transform(y_test_pred)
))

# Cross-Validation Accuracy (CV=5)
cv_scores = cross_val_score(model, X_scaled, y, cv=5)
print(f"📈 Cross-Validation Accuracy (CV=5): {cv_scores.mean():.4f}")




✅ Training Accuracy: 0.9451
✅ Testing Accuracy:  0.9590
📊 Classification Report (Test):
              precision    recall  f1-score   support

      Normal       0.95      1.00      0.98        84
       Obese       0.98      0.90      0.94        51
  Overweight       0.93      0.97      0.95        65
 Underweight       1.00      0.93      0.96        44

    accuracy                           0.96       244
   macro avg       0.96      0.95      0.96       244
weighted avg       0.96      0.96      0.96       244

📈 Cross-Validation Accuracy (CV=5): 0.9466


In [2]:
import joblib

# Save the trained model, scaler, and label encoder
joblib.dump(model, "../models/bmi/bmi_model.pkl")
joblib.dump(scaler, "../models/bmi/bmi_scaler.pkl")
joblib.dump(label_encoder, "../models/bmi/bmi_label_encoder.pkl")


['../models/bmi/bmi_label_encoder.pkl']

In [3]:
# Load the saved model and scaler
loaded_model = joblib.load("../models/bmi/bmi_model.pkl")
loaded_scaler = joblib.load("../models/bmi/bmi_scaler.pkl")

# Purposeful test samples (diverse profiles)
test_samples = np.array([
    [45, 1.50, 1, 0],  # Very light female, beginner, short height
    [110, 1.90, 5, 1], # Very heavy male, expert, tall height
    [65, 1.65, 2, 0],  # Average female, low experience
    [85, 1.75, 3, 1],  # Heavy male, medium experience
    [55, 1.80, 4, 0],  # Slim female, experienced, tall height
])

# Scale the test samples using loaded scaler
test_samples_scaled = loaded_scaler.transform(test_samples)

# Predict using loaded model
predictions_encoded = loaded_model.predict(test_samples_scaled)
predictions = label_encoder.inverse_transform(predictions_encoded)

# Show test samples with their predicted classes
for i, sample in enumerate(test_samples):
    print(f"Test sample {i+1}: Features={sample} -> Predicted BMI_Class: {predictions[i]}")

Test sample 1: Features=[45.   1.5  1.   0. ] -> Predicted BMI_Class: Normal
Test sample 2: Features=[110.    1.9   5.    1. ] -> Predicted BMI_Class: Obese
Test sample 3: Features=[65.    1.65  2.    0.  ] -> Predicted BMI_Class: Normal
Test sample 4: Features=[85.    1.75  3.    1.  ] -> Predicted BMI_Class: Overweight
Test sample 5: Features=[55.   1.8  4.   0. ] -> Predicted BMI_Class: Underweight
