 # The Dataset we used for this model alredy cleaned Data so we directly moving to ML training 

In [1]:
import pandas as pd
df=pd.read_csv("/kaggle/input/workout-clean-dataset/workout_recovery_cleaned.csv")

In [2]:
df.columns

Index(['Age', 'Gender', 'Weight (kg)', 'Height (m)', 'Max_BPM', 'Avg_BPM',
       'Resting_BPM', 'Session_Duration (hours)', 'Calories_Burned',
       'Workout_Type', 'Fat_Percentage', 'Water_Intake (liters)',
       'Workout_Frequency (days/week)', 'BMI', 'recovery_time'],
      dtype='object')

In [3]:
from sklearn.model_selection import train_test_split, GridSearchCV
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
import joblib
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder

# Load the dataset
df = pd.read_csv('/kaggle/input/workout-clean-dataset/workout_recovery_cleaned.csv')  # Your file

# Prepare data (target: recovery_time)
X = df.drop(['recovery_time'], axis=1)  # Features: Age, Gender, Weight (kg), etc.
y = df['recovery_time']  # Target

# Handle categoricals (Gender, Workout_Type)
encoder = OneHotEncoder(sparse_output=False, drop='first')
categorical_cols = ['Gender', 'Workout_Type']
categorical_data = X[categorical_cols]
X_encoded = encoder.fit_transform(categorical_data)
X_encoded_df = pd.DataFrame(X_encoded, columns=encoder.get_feature_names_out(categorical_cols), index=X.index)

# Drop original categoricals, concat encoded
X = X.drop(categorical_cols, axis=1)
X = pd.concat([X, X_encoded_df], axis=1)

# Ensure X is numeric
X = X.select_dtypes(include=[np.number])

print(f"X Shape: {X.shape}, y Shape: {y.shape}")
print(f"Target range: {y.min():.2f} - {y.max():.2f} hours")

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initial LightGBM
lgb_model = lgb.LGBMRegressor(device='gpu', n_estimators=100, max_depth=10, random_state=42)
lgb_model.fit(X_train, y_train)
lgb_preds = lgb_model.predict(X_test)
lgb_mse = mean_squared_error(y_test, lgb_preds)
print(f"Initial LightGBM MSE: {lgb_mse:.2f}")

# Hyperparam optimization
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20],
    'learning_rate': [0.1, 0.2]
}
grid_search = GridSearchCV(estimator=lgb_model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
print(f"Best Parameters: {grid_search.best_params_}")

# Evaluate best model
best_preds = best_model.predict(X_test)
best_mse = mean_squared_error(y_test, best_preds)
print(f"Optimized MSE: {best_mse:.2f}")

# Save model
joblib.dump(best_model, '/kaggle/working/fitness_model.pkl')
print("Model saved as 'fitness_model.pkl' for app.py!")

# Quick test pred (avg input example)
avg_input = X_test.iloc[[0]].copy()  # First test row
test_pred = best_model.predict(avg_input)[0]
print(f"\nExample Prediction (first test row): {test_pred:.2f} recovery hours")

X Shape: (1645, 19), y Shape: (1645,)
Target range: 24.00 - 71.95 hours
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 1349
[LightGBM] [Info] Number of data points in the train set: 1316, number of used features: 16
[LightGBM] [Info] Using GPU Device: Tesla T4, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...




[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 14 dense feature groups (0.02 MB) transferred to GPU in 0.000792 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 48.954822
Initial LightGBM MSE: 211.36
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 1311
[LightGBM] [Info] Number of data points in the train set: 878, number of used features: 16
[LightGBM] [Info] Using GPU Device: Tesla T4, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 14 dense feature groups (0.01 MB) transferred to GPU in 0.001262 secs. 0 sparse feature groups
[LightGBM] [Info] Start training from score 48.178488
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 1300
[LightGBM] [Info] Number of data points in the train set: 877, num