In [17]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [18]:
import numpy as ny
import pandas as pd


In [19]:
df1=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data_set/calories/calories.csv')

In [20]:
df2=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data_set/calories/exercise.csv')

In [21]:
df = pd.concat([df2, df1['Calories']], axis=1)

In [23]:
# Split features and target
X = df.drop(columns=['User_ID', 'Calories'])
Y = df['Calories']

In [46]:
X

Unnamed: 0,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp
0,male,68,190.0,94.0,29.0,105.0,40.8
1,female,20,166.0,60.0,14.0,94.0,40.3
2,male,69,179.0,79.0,5.0,88.0,38.7
3,female,34,179.0,71.0,13.0,100.0,40.5
4,female,27,154.0,58.0,10.0,81.0,39.8
...,...,...,...,...,...,...,...
14995,female,20,193.0,86.0,11.0,92.0,40.4
14996,female,27,165.0,65.0,6.0,85.0,39.2
14997,female,43,159.0,58.0,16.0,90.0,40.1
14998,male,78,193.0,97.0,2.0,84.0,38.3


In [24]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

In [25]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Column separation
categorical_features = ['Gender']
numerical_features = [col for col in X.columns if col not in categorical_features]

# Preprocessor
preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numerical_features),
    ('cat', OneHotEncoder(drop='first'), categorical_features)
])


In [26]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

rf_pipeline = Pipeline([
    ('preprocessing', preprocessor),
    ('regressor', RandomForestRegressor())
])

rf_params = {
    'regressor__n_estimators': [100, 200],
    'regressor__max_depth': [None, 10, 20]
}

rf_grid = GridSearchCV(rf_pipeline, rf_params, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
rf_grid.fit(X_train, Y_train)

rf_best = rf_grid.best_estimator_
rf_preds = rf_best.predict(X_test)

from sklearn.metrics import mean_absolute_error
print("Random Forest Best Params:", rf_grid.best_params_)
print("Random Forest MAE:", mean_absolute_error(Y_test, rf_preds))


Random Forest Best Params: {'regressor__max_depth': 20, 'regressor__n_estimators': 200}
Random Forest MAE: 1.677297331890332


In [28]:
from sklearn.metrics import mean_absolute_error, r2_score

In [29]:
print("Random Forest score:", r2_score(Y_test, rf_preds))

Random Forest score: 0.9982227961031749


In [15]:
from sklearn.svm import SVR

svr_pipeline = Pipeline([
    ('preprocessing', preprocessor),
    ('regressor', SVR())
])

svr_params = {
    'regressor__kernel': ['rbf', 'linear'],
    'regressor__C': [0.1, 1, 10],
    'regressor__gamma': ['scale', 'auto']
}

svr_grid = GridSearchCV(svr_pipeline, svr_params, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
svr_grid.fit(X_train, Y_train)

svr_best = svr_grid.best_estimator_
svr_preds = svr_best.predict(X_test)

print("SVR Best Params:", svr_grid.best_params_)
print("SVR MAE:", mean_absolute_error(Y_test, svr_preds))


SVR Best Params: {'regressor__C': 10, 'regressor__gamma': 'auto', 'regressor__kernel': 'rbf'}
SVR MAE: 0.5900921728855388


In [30]:
print("Random Forest score:", r2_score(Y_test, svr_preds))

Random Forest score: 0.9992405244014586


In [31]:
from sklearn.linear_model import LinearRegression

lr_pipeline = Pipeline([
    ('preprocessing', preprocessor),
    ('regressor', LinearRegression())
])

lr_params = {
    'regressor__fit_intercept': [True, False]
}

lr_grid = GridSearchCV(lr_pipeline, lr_params, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
lr_grid.fit(X_train, Y_train)

lr_best = lr_grid.best_estimator_
lr_preds = lr_best.predict(X_test)

print("Linear Regression Best Params:", lr_grid.best_params_)
print("Linear Regression MAE:", mean_absolute_error(Y_test, lr_preds))


Linear Regression Best Params: {'regressor__fit_intercept': True}
Linear Regression MAE: 8.385188053147184


In [32]:
print("Random Forest score:", r2_score(Y_test, lr_preds))

Random Forest score: 0.9668790377181355


In [34]:
from sklearn.neighbors import KNeighborsRegressor

In [35]:
knn_pipeline = Pipeline([
    ('preprocessing', preprocessor),
    ('regressor', KNeighborsRegressor())
])

knn_params = {
    'regressor__n_neighbors': [3, 5, 7],
    'regressor__weights': ['uniform', 'distance']
}

knn_grid = GridSearchCV(knn_pipeline, knn_params, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
knn_grid.fit(X_train, Y_train)

knn_preds = knn_grid.predict(X_test)
print("KNN Best Params:", knn_grid.best_params_)
print("KNN MAE:", mean_absolute_error(Y_test, knn_preds))

KNN Best Params: {'regressor__n_neighbors': 7, 'regressor__weights': 'distance'}
KNN MAE: 3.4553509414405257


In [37]:
print("Random Forest score:", r2_score(Y_test, knn_preds))

Random Forest score: 0.9941649322143337


In [38]:
from sklearn.ensemble import GradientBoostingRegressor

gb_pipeline = Pipeline([
    ('preprocessing', preprocessor),
    ('regressor', GradientBoostingRegressor())
])

gb_params = {
    'regressor__n_estimators': [100, 200],
    'regressor__learning_rate': [0.1, 0.05],
    'regressor__max_depth': [3, 5]
}

gb_grid = GridSearchCV(gb_pipeline, gb_params, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
gb_grid.fit(X_train, Y_train)

gb_preds = gb_grid.predict(X_test)
print("Gradient Boosting Best Params:", gb_grid.best_params_)
print("Gradient Boosting MAE:", mean_absolute_error(Y_test, gb_preds))

Gradient Boosting Best Params: {'regressor__learning_rate': 0.1, 'regressor__max_depth': 5, 'regressor__n_estimators': 200}
Gradient Boosting MAE: 1.1793931580742123


In [40]:
print("Random Forest score:", r2_score(Y_test, gb_preds))

Random Forest score: 0.9992667590596462


In [39]:
from xgboost import XGBRegressor

xgb_pipeline = Pipeline([
    ('preprocessing', preprocessor),
    ('regressor', XGBRegressor(objective='reg:squarederror', verbosity=0))
])

xgb_params = {
    'regressor__n_estimators': [100, 200],
    'regressor__learning_rate': [0.1, 0.05],
    'regressor__max_depth': [3, 5]
}

xgb_grid = GridSearchCV(xgb_pipeline, xgb_params, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
xgb_grid.fit(X_train, Y_train)

xgb_preds = xgb_grid.predict(X_test)
print("XGBoost Best Params:", xgb_grid.best_params_)
print("XGBoost MAE:", mean_absolute_error(Y_test, xgb_preds))


XGBoost Best Params: {'regressor__learning_rate': 0.1, 'regressor__max_depth': 5, 'regressor__n_estimators': 200}
XGBoost MAE: 1.1923373230795065


In [41]:
print("Random Forest score:", r2_score(Y_test, xgb_preds))

Random Forest score: 0.9992755297885657


In [43]:
import pickle

In [44]:
# Save XGBoost model
with open('xgboost_model.pkl', 'wb') as file:
    pickle.dump(xgb_grid, file)

In [47]:
X.columns.tolist()

['Gender', 'Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']

In [49]:
import xgboost
print(xgboost.__version__)


2.1.4
