In [148]:
import numpy as np
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder, FunctionTransformer
from sklearn.compose import make_column_transformer

from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline

from sklearn.metrics import accuracy_score

from sklearn.compose import ColumnTransformer


In [284]:
data = pd.read_csv("data/ObesityDataSet.csv")

gender = ["Female", "Male"]
frequency = ["no", "Sometimes", "Frequently", "Always"]
decision = ["no", "yes"]
transport = ["Walking", "Bike", "Motorbike", "Public_Transportation", "Automobile"]
labels = ["Insufficient_Weight", "Normal_Weight", "Overweight_Level_I", "Overweight_Level_II", "Obesity_Type_I", "Obesity_Type_II", "Obesity_Type_III"],
oe_map = {
    "name": "oe",
    "features": ["Gender", "family_history_with_overweight", "FAVC", "SMOKE", "SCC", "CAEC", "CALC", "MTRANS"],
    "categories": [gender, decision, decision, decision, decision, frequency,frequency, transport]
}

oe = OrdinalEncoder(categories=oe_map["categories"])
ct = ColumnTransformer(
    transformers=[
        ("oe", oe, oe_map["features"])
    ], remainder='passthrough', verbose_feature_names_out=False).set_output(transform='pandas')

data = ct.fit_transform(data).reindex(data.columns, axis=1)

le = LabelEncoder()
le.fit(np.array(labels).reshape(-1, 1).ravel())
data["NObeyesdad"] = le.transform(data["NObeyesdad"])

X = data.drop("NObeyesdad", axis=1)
Y = data["NObeyesdad"]


def column_inverse_transform(data, transformer, map):
    if pd.Series(map["features"]).isin(data.columns).all():
        data[map["features"]] = transformer.named_transformers_[
            map["name"]].inverse_transform(data[map["features"]])
    return data

def label_inverse_transform(data, le):
    if type(data) == pd.Series:
        return le.inverse_transform(data)
    if "NObeyesdad" in  data.columns:
        data["NObeyesdad"] = le.inverse_transform(data["NObeyesdad"])
    return data


In [285]:
data = column_inverse_transform(data, ct, oe_map)
data["NObeyesdad"] = label_inverse_transform(data["NObeyesdad"], le)

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,NObeyesdad
0,Female,21.000000,1.620000,64.000000,yes,no,2.0,3.0,Sometimes,no,2.000000,no,0.000000,1.000000,no,Public_Transportation,Normal_Weight
1,Female,21.000000,1.520000,56.000000,yes,no,3.0,3.0,Sometimes,yes,3.000000,yes,3.000000,0.000000,Sometimes,Public_Transportation,Normal_Weight
2,Male,23.000000,1.800000,77.000000,yes,no,2.0,3.0,Sometimes,no,2.000000,no,2.000000,1.000000,Frequently,Public_Transportation,Normal_Weight
3,Male,27.000000,1.800000,87.000000,no,no,3.0,3.0,Sometimes,no,2.000000,no,2.000000,0.000000,Frequently,Walking,Overweight_Level_I
4,Male,22.000000,1.780000,89.800000,no,no,2.0,1.0,Sometimes,no,2.000000,no,0.000000,0.000000,Sometimes,Public_Transportation,Overweight_Level_II
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2106,Female,20.976842,1.710730,131.408528,yes,yes,3.0,3.0,Sometimes,no,1.728139,no,1.676269,0.906247,Sometimes,Public_Transportation,Obesity_Type_III
2107,Female,21.982942,1.748584,133.742943,yes,yes,3.0,3.0,Sometimes,no,2.005130,no,1.341390,0.599270,Sometimes,Public_Transportation,Obesity_Type_III
2108,Female,22.524036,1.752206,133.689352,yes,yes,3.0,3.0,Sometimes,no,2.054193,no,1.414209,0.646288,Sometimes,Public_Transportation,Obesity_Type_III
2109,Female,24.361936,1.739450,133.346641,yes,yes,3.0,3.0,Sometimes,no,2.852339,no,1.139107,0.586035,Sometimes,Public_Transportation,Obesity_Type_III


In [130]:
xTrain, xTest, yTrain, yTest = train_test_split(
    X, Y, test_size=0.2, random_state=42, stratify=Y)


In [286]:
print("Neural Network")
nn_models = ["sgd", "adam"]
for model in nn_models:
  clf = MLPClassifier(solver=model, random_state=42, max_iter=10000)
  
  # clf_pipe = make_pipeline(ct, clf)
  # clf_pipe.fit(xTrain,yTrain)
  # yPred = clf_pipe.predict(xTest)
  
  clf.fit(xTrain,yTrain)
  yPred = clf.predict(xTest)
  accuracy = accuracy_score(yTest, yPred)*100
  print(f"{model}\t=> Accuracy: {accuracy}")
  


Neural Network
sgd	=> Accuracy: 68.79432624113475
adam	=> Accuracy: 73.99527186761229


In [287]:
print("SVM")
svm_models = ["linear", "rbf", "sigmoid"]
for model in svm_models:
  clf = SVC(kernel=model, random_state=0)
  
  # clf_pipe = make_pipeline(ct, clf)
  # clf_pipe.fit(xTrain, yTrain)
  # yPred = clf_pipe.predict(xTest)

  clf.fit(xTrain, yTrain)
  yPred = clf.predict(xTest)
  accuracy = accuracy_score(yTest, yPred)*100
  print(f"{model}\t=> Accuracy: {accuracy}")


SVM
linear	=> Accuracy: 86.28841607565012
rbf	=> Accuracy: 63.593380614657214
sigmoid	=> Accuracy: 3.309692671394799
