In [19]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle

In [20]:
dataset = pd.read_excel("./dataset/Obesity_Dataset.xlsx")
dataset.head()

Unnamed: 0,Sex,Age,Height,Overweight_Obese_Family,Consumption_of_Fast_Food,Frequency_of_Consuming_Vegetables,Number_of_Main_Meals_Daily,Food_Intake_Between_Meals,Smoking,Liquid_Intake_Daily,Calculation_of_Calorie_Intake,Physical_Excercise,Schedule_Dedicated_to_Technology,Type_of_Transportation_Used,Class
0,2,18,155,2,2,3,1,3,2,1,2,3,3,4,2
1,2,18,158,2,2,3,1,1,2,1,2,1,3,3,2
2,2,18,159,2,2,2,1,3,2,3,2,2,3,4,2
3,2,18,162,2,2,2,2,2,2,2,2,1,3,4,2
4,2,18,165,2,1,2,1,3,2,1,2,3,3,2,2


In [21]:
dataset.shape

(1610, 15)

In [22]:
dataset["Class"].value_counts()

Class
2    658
3    592
4    287
1     73
Name: count, dtype: int64

In [23]:
# memisahkan data fitur dan label 
x = dataset.drop(columns='Class', axis=1)
y = dataset["Class"]

Standarisasi Data

In [24]:
scaler = StandardScaler()
x = scaler.fit_transform(x)
print(x)

[[ 0.89043419 -1.53737748 -1.59721504 ... -0.19800018  1.41545145
   0.87812402]
 [ 0.89043419 -1.53737748 -1.22115242 ... -1.68762769  1.41545145
   0.21932641]
 [ 0.89043419 -1.53737748 -1.09579821 ... -0.94281393  1.41545145
   0.87812402]
 ...
 [ 0.89043419  1.92071108 -0.71973559 ...  0.54681358 -1.4510603
  -1.09826881]
 [ 0.89043419  2.02241957  0.03238966 ... -0.94281393 -1.4510603
  -1.09826881]
 [ 0.89043419  2.12412805  0.28309807 ...  0.54681358  1.41545145
  -1.09826881]]


Memisahkan data training dan data testing

In [25]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=2)
print(x.shape, x_train.shape, x_test.shape)

(1610, 14) (1288, 14) (322, 14)


Membuat model data latih menggunakan algoritma K-Nearest Neighbor (KNN)

In [26]:
knn_classifier = KNeighborsClassifier(n_neighbors=7,  metric='euclidean')
knn_classifier.fit(x_train, y_train)

Membuat evaluasi untuk mengukur tingkat akurasi

In [27]:
x_train_prediction = knn_classifier.predict(x_train)
training_data_acc = accuracy_score(x_train_prediction, y_train)
print("Akurasi Data Training =>", training_data_acc)

Akurasi Data Training => 0.7911490683229814


In [28]:
x_test_prediction = knn_classifier.predict(x_test)
testing_data_acc = accuracy_score(x_test_prediction, y_test)
print("Akurasi Data Testing => ", testing_data_acc)

Akurasi Data Testing =>  0.7608695652173914


In [29]:
print(classification_report(x_test_prediction, y_test))

              precision    recall  f1-score   support

           1       0.53      0.73      0.62        11
           2       0.89      0.78      0.83       152
           3       0.70      0.75      0.73       110
           4       0.63      0.73      0.68        49

    accuracy                           0.76       322
   macro avg       0.69      0.75      0.71       322
weighted avg       0.78      0.76      0.77       322



Membuat model prediksi

In [30]:
input_data = (2,	18,	155,	2,	2,	3,	1,	3,	2,	1,	2,	3,	3,	4)
input_data_as_numpy_array = np.array(input_data)
input_data_reshape = input_data_as_numpy_array.reshape(1, -1)

std_data = scaler.transform(input_data_reshape)

prediction = knn_classifier.predict(std_data)
print("Prediksi Kelas:", prediction[0])

# Interpret the prediction
if prediction[0] == 1:
    print("Hasil: Underweight")
elif prediction[0] == 2:
    print("Hasil: Normal")
elif prediction[0] == 3:
    print("Hasil: Overweight")
else:
    print("Hasil: Obesity")

Prediksi Kelas: 2
Hasil: Normal




In [31]:
pickle.dump(knn_classifier, open("knn_model.sav", "wb"))
pickle.dump(scaler, open("scaler.sav", "wb"))