In [1]:
import numpy as np
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder, FunctionTransformer
from sklearn.compose import make_column_transformer

from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline

from sklearn.metrics import accuracy_score

from sklearn.compose import ColumnTransformer


In [2]:
data = pd.read_csv("data/ObesityDataSet.csv")

gender = ["Female", "Male"]
frequency = ["no", "Sometimes", "Frequently", "Always"]
decision = ["no", "yes"]
transport = ["Walking", "Bike", "Motorbike", "Public_Transportation", "Automobile"]
labels = ["Insufficient_Weight", "Normal_Weight", "Overweight_Level_I", "Overweight_Level_II", "Obesity_Type_I", "Obesity_Type_II", "Obesity_Type_III"],
oe_map = {
    "name": "oe",
    "features": ["Gender", "family_history_with_overweight", "FAVC", "SMOKE", "SCC", "CAEC", "CALC", "MTRANS"],
    "categories": [gender, decision, decision, decision, decision, frequency,frequency, transport]
}

oe = OrdinalEncoder(categories=oe_map["categories"])
ct = ColumnTransformer(
    transformers=[
        ("oe", oe, oe_map["features"])
    ], remainder='passthrough', verbose_feature_names_out=False).set_output(transform='pandas')

data = ct.fit_transform(data).reindex(data.columns, axis=1)

le = LabelEncoder()
le.fit(np.array(labels).reshape(-1, 1).ravel())
data["NObeyesdad"] = le.transform(data["NObeyesdad"])

X = data.drop("NObeyesdad", axis=1)
Y = data["NObeyesdad"]


def column_inverse_transform(data, transformer, map):
    if pd.Series(map["features"]).isin(data.columns).all():
        data[map["features"]] = transformer.named_transformers_[
            map["name"]].inverse_transform(data[map["features"]])
    return data

def label_inverse_transform(data, le):
    if type(data) == pd.Series:
        return le.inverse_transform(data)
    if "NObeyesdad" in  data.columns:
        data["NObeyesdad"] = le.inverse_transform(data["NObeyesdad"])
    return data


In [3]:
data = column_inverse_transform(data, ct, oe_map)
data["NObeyesdad"] = label_inverse_transform(data["NObeyesdad"], le)

In [4]:

xTrain, xTest, yTrain, yTest = train_test_split(
    X, Y, test_size=0.2, random_state=42, stratify=Y)


In [5]:
print("Neural Network")
nn_models = ["sgd", "adam"]
for model in nn_models:
  clf = MLPClassifier(solver=model, random_state=42, max_iter=10000)
  
  # clf_pipe = make_pipeline(ct, clf)
  # clf_pipe.fit(xTrain,yTrain)
  # yPred = clf_pipe.predict(xTest)
  
  clf.fit(xTrain,yTrain)
  yPred = clf.predict(xTest)
  accuracy = accuracy_score(yTest, yPred)*100
  print(f"{model}\t=> Accuracy: {accuracy}")
  


Neural Network
sgd	=> Accuracy: 63.593380614657214
adam	=> Accuracy: 84.16075650118204


In [6]:
print("SVM")
svm_models = ["linear", "rbf", "sigmoid"]
for model in svm_models:
  clf = SVC(kernel=model, random_state=0)
  
  # clf_pipe = make_pipeline(ct, clf)
  # clf_pipe.fit(xTrain, yTrain)
  # yPred = clf_pipe.predict(xTest)

  clf.fit(xTrain, yTrain)
  yPred = clf.predict(xTest)
  accuracy = accuracy_score(yTest, yPred)*100
  print(f"{model}\t=> Accuracy: {accuracy}")


SVM
linear	=> Accuracy: 85.1063829787234
rbf	=> Accuracy: 64.0661938534279
sigmoid	=> Accuracy: 3.309692671394799
