In [24]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib  # For saving the model and preprocessing components


In [25]:

# Load the dataset
file_path = 'gym_recommendation.xlsx'
data = pd.ExcelFile(file_path)
df = data.parse('Sheet1')


In [26]:

# Data Preprocessing
df_preprocessed = df.copy()

# Encode categorical columns
categorical_columns = ['Sex', 'Hypertension', 'Diabetes', 'Level', 'Fitness Goal', 'Fitness Type']
label_encoders = {col: LabelEncoder() for col in categorical_columns}

for col in categorical_columns:
    df_preprocessed[col] = label_encoders[col].fit_transform(df_preprocessed[col])


In [27]:

# Drop irrelevant columns (e.g., ID) and separate features and target
features = df_preprocessed.drop(columns=['ID', 'Exercises'])
target = df_preprocessed['Exercises']

# Scale features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, random_state=42, stratify=target)


In [28]:

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'chebyshev']
}

knn_model = KNeighborsClassifier()

# Grid search for optimal parameters
grid_search = GridSearchCV(knn_model, param_grid, cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

# Get the best model from grid search
best_knn_model = grid_search.best_estimator_

# Predict on the test set using the best model
y_pred = best_knn_model.predict(X_test)




Fitting 5 folds for each of 30 candidates, totalling 150 fits


In [29]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_report_text = classification_report(y_test, y_pred)

# Print the evaluation results
print("Best KNN Model Parameters:", grid_search.best_params_)
print("KNN Model Accuracy:", accuracy)
print("\nClassification Report:\n", classification_report_text)

# Save the model, scaler, and label encoders
joblib.dump(best_knn_model, 'knn_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')

print("Model, scaler, and label encoders saved successfully!")

Best KNN Model Parameters: {'metric': 'manhattan', 'n_neighbors': 5, 'weights': 'uniform'}
KNN Model Accuracy: 0.997943797121316

Classification Report:
                                                               precision    recall  f1-score   support

     Brisk walking, cycling, swimming, running , or dancing.       1.00      1.00      1.00       841
      Squats, deadlifts, bench presses, and overhead presses       1.00      1.00      1.00       727
Squats, yoga, deadlifts, bench presses, and overhead presses       1.00      1.00      1.00       675
                                    Walking, Yoga, Swimming.       1.00      1.00      1.00       338
               brisk walking, cycling, swimming, or dancing.       1.00      1.00      1.00       337

                                                    accuracy                           1.00      2918
                                                   macro avg       1.00      1.00      1.00      2918
                            

In [3]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib  # For saving the model and preprocessing components

# Load the dataset
file_path = 'gym_recommendation.xlsx'
data = pd.ExcelFile(file_path)
df = data.parse('Sheet1')

# Data Preprocessing
df_preprocessed = df.copy()

# Encode categorical columns
categorical_columns = ['Sex', 'Hypertension', 'Diabetes', 'Level', 'Fitness Goal', 'Fitness Type']
label_encoders = {col: LabelEncoder() for col in categorical_columns}

for col in categorical_columns:
    df_preprocessed[col] = label_encoders[col].fit_transform(df_preprocessed[col])

# Drop irrelevant columns (e.g., ID) and separate features and target
features = df_preprocessed.drop(columns=['ID', 'Exercises'])
target = df_preprocessed['Exercises']

# Save feature names to ensure correct order during prediction
joblib.dump(features.columns.tolist(), 'feature_names.pkl')

# Scale features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, random_state=42, stratify=target)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'chebyshev']
}

knn_model = KNeighborsClassifier()

# Grid search for optimal parameters
grid_search = GridSearchCV(knn_model, param_grid, cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

# Get the best model from grid search
best_knn_model = grid_search.best_estimator_

# Predict on the test set using the best model
y_pred = best_knn_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_report_text = classification_report(y_test, y_pred)

# Print the evaluation results
print("Best KNN Model Parameters:", grid_search.best_params_)
print("KNN Model Accuracy:", accuracy)
print("\nClassification Report:\n", classification_report_text)

# Save the model, scaler, label encoders, and feature names
joblib.dump(best_knn_model, 'knn_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')

print("Model, scaler, label encoders, and feature names saved successfully!")


Fitting 5 folds for each of 30 candidates, totalling 150 fits
Best KNN Model Parameters: {'metric': 'manhattan', 'n_neighbors': 5, 'weights': 'uniform'}
KNN Model Accuracy: 0.997943797121316

Classification Report:
                                                               precision    recall  f1-score   support

     Brisk walking, cycling, swimming, running , or dancing.       1.00      1.00      1.00       841
      Squats, deadlifts, bench presses, and overhead presses       1.00      1.00      1.00       727
Squats, yoga, deadlifts, bench presses, and overhead presses       1.00      1.00      1.00       675
                                    Walking, Yoga, Swimming.       1.00      1.00      1.00       338
               brisk walking, cycling, swimming, or dancing.       1.00      1.00      1.00       337

                                                    accuracy                           1.00      2918
                                                   macro avg       1