In [1]:
#Step 1: Import the required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report


In [2]:
# Load the dataset
data = pd.read_csv('ObesityDataSet_raw_and_data_sinthetic.csv')

In [4]:
data.dtypes

Gender                             object
Age                               float64
Height                            float64
Weight                            float64
family_history_with_overweight     object
FAVC                               object
FCVC                              float64
NCP                               float64
CAEC                               object
SMOKE                              object
CH2O                              float64
SCC                                object
FAF                               float64
TUE                               float64
CALC                               object
MTRANS                             object
NObeyesdad                         object
dtype: object

In [5]:
# Split the dataset into features (X) and target (y)
X = data.drop('NObeyesdad', axis=1)  # Features
y = data['NObeyesdad']  # Target variable

In [6]:
# Encode categorical features using LabelEncoder
label_encoder = LabelEncoder()
X['Gender'] = label_encoder.fit_transform(X['Gender'])
X['family_history_with_overweight'] = label_encoder.fit_transform(X['family_history_with_overweight'])
X['FAVC'] = label_encoder.fit_transform(X['FAVC'])
X['CAEC'] = label_encoder.fit_transform(X['CAEC'])
X['SMOKE'] = label_encoder.fit_transform(X['SMOKE'])
X['SCC'] = label_encoder.fit_transform(X['SCC'])
X['CALC'] = label_encoder.fit_transform(X['CALC'])
X['MTRANS'] = label_encoder.fit_transform(X['MTRANS'])

In [7]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
#Step 3: Model training and evaluation using different algorithms
# 1. Logistic Regression
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)
lr_accuracy = accuracy_score(y_test, lr_pred)
print("Logistic Regression Accuracy:", lr_accuracy)
print(classification_report(y_test, lr_pred))

Logistic Regression Accuracy: 0.6548463356973995
                     precision    recall  f1-score   support

Insufficient_Weight       0.74      0.93      0.83        56
      Normal_Weight       0.53      0.42      0.47        62
     Obesity_Type_I       0.58      0.60      0.59        78
    Obesity_Type_II       0.82      0.84      0.83        58
   Obesity_Type_III       0.90      1.00      0.95        63
 Overweight_Level_I       0.54      0.38      0.44        56
Overweight_Level_II       0.35      0.38      0.37        50

           accuracy                           0.65       423
          macro avg       0.64      0.65      0.64       423
       weighted avg       0.64      0.65      0.64       423



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [9]:
# 2. Decision Tree
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_pred)
print("Decision Tree Accuracy:", dt_accuracy)
print(classification_report(y_test, dt_pred))

Decision Tree Accuracy: 0.933806146572104
                     precision    recall  f1-score   support

Insufficient_Weight       0.92      0.96      0.94        56
      Normal_Weight       0.88      0.85      0.87        62
     Obesity_Type_I       0.95      0.92      0.94        78
    Obesity_Type_II       0.95      0.95      0.95        58
   Obesity_Type_III       1.00      1.00      1.00        63
 Overweight_Level_I       0.89      0.91      0.90        56
Overweight_Level_II       0.94      0.94      0.94        50

           accuracy                           0.93       423
          macro avg       0.93      0.93      0.93       423
       weighted avg       0.93      0.93      0.93       423



In [10]:
# 3. Random Forest
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_pred)
print("Random Forest Accuracy:", rf_accuracy)
print(classification_report(y_test, rf_pred))


Random Forest Accuracy: 0.9527186761229315
                     precision    recall  f1-score   support

Insufficient_Weight       1.00      0.96      0.98        56
      Normal_Weight       0.89      0.90      0.90        62
     Obesity_Type_I       0.99      0.96      0.97        78
    Obesity_Type_II       0.97      0.98      0.97        58
   Obesity_Type_III       1.00      1.00      1.00        63
 Overweight_Level_I       0.85      0.89      0.87        56
Overweight_Level_II       0.98      0.96      0.97        50

           accuracy                           0.95       423
          macro avg       0.95      0.95      0.95       423
       weighted avg       0.95      0.95      0.95       423



In [11]:
# 4. Support Vector Machines (SVM)
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_pred)
print("SVM Accuracy:", svm_accuracy)
print(classification_report(y_test, svm_pred))

SVM Accuracy: 0.5650118203309693
                     precision    recall  f1-score   support

Insufficient_Weight       0.71      0.88      0.78        56
      Normal_Weight       0.48      0.34      0.40        62
     Obesity_Type_I       0.65      0.33      0.44        78
    Obesity_Type_II       0.77      0.41      0.54        58
   Obesity_Type_III       0.56      1.00      0.72        63
 Overweight_Level_I       0.47      0.48      0.47        56
Overweight_Level_II       0.43      0.58      0.49        50

           accuracy                           0.57       423
          macro avg       0.58      0.57      0.55       423
       weighted avg       0.59      0.57      0.54       423

