# Preprocessing Data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from collections import Counter
from numpy import where

In [2]:
data = pd.read_csv("./../dataset/mentalhealth(le).csv", delimiter = ';')
data

Unnamed: 0,Mood,Sering_Pusing,Sering_Menangis,Sulit_Tidur,Pola_Makan,Sering_Gelisah,Hasil
0,0,0,0,1,1,1,0
1,1,1,0,1,1,1,1
2,1,0,0,0,0,0,0
3,1,1,0,1,1,1,1
4,1,0,1,1,1,1,1
...,...,...,...,...,...,...,...
144,1,1,0,0,1,1,1
145,1,0,1,0,0,0,0
146,0,0,0,0,0,0,2
147,0,0,0,0,0,0,2


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   Mood             149 non-null    int64
 1   Sering_Pusing    149 non-null    int64
 2   Sering_Menangis  149 non-null    int64
 3   Sulit_Tidur      149 non-null    int64
 4   Pola_Makan       149 non-null    int64
 5   Sering_Gelisah   149 non-null    int64
 6   Hasil            149 non-null    int64
dtypes: int64(7)
memory usage: 8.3 KB


In [4]:
missing_data = pd.DataFrame({'total_missing': data.isnull().sum(), 'perc_missing': (data.isnull().sum()/616)*100})
missing_data

Unnamed: 0,total_missing,perc_missing
Mood,0,0.0
Sering_Pusing,0,0.0
Sering_Menangis,0,0.0
Sulit_Tidur,0,0.0
Pola_Makan,0,0.0
Sering_Gelisah,0,0.0
Hasil,0,0.0


In [5]:
# Mengambil kolom index ke-0 sampai ke-5
X = data.iloc[:,:6].values

# Hanya Mengambil kolom index ke-6
Y = data.iloc[:,6].values

In [6]:
print(X, Y)

[[0 0 0 1 1 1]
 [1 1 0 1 1 1]
 [1 0 0 0 0 0]
 [1 1 0 1 1 1]
 [1 0 1 1 1 1]
 [0 0 0 1 1 0]
 [1 1 1 1 1 1]
 [1 1 1 1 1 1]
 [1 1 1 1 0 0]
 [1 0 0 1 1 1]
 [0 0 0 1 1 1]
 [0 1 0 1 1 0]
 [1 0 0 1 0 0]
 [1 1 1 1 1 1]
 [1 1 1 0 0 1]
 [1 1 0 0 0 1]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [1 1 1 1 1 1]
 [1 0 1 1 0 1]
 [0 0 0 1 1 1]
 [1 1 1 1 0 1]
 [1 1 1 1 1 1]
 [1 1 1 1 1 1]
 [0 1 0 1 0 1]
 [1 1 0 1 1 1]
 [1 1 1 1 1 1]
 [1 1 0 1 1 1]
 [1 0 1 1 1 0]
 [0 1 0 0 0 1]
 [1 1 1 1 1 1]
 [1 1 0 1 1 1]
 [1 1 1 1 1 1]
 [0 0 0 1 1 0]
 [1 1 1 0 1 1]
 [1 0 0 0 1 0]
 [1 1 1 1 1 1]
 [1 1 1 0 1 1]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [1 0 0 1 0 0]
 [1 1 0 0 0 0]
 [0 0 0 1 1 0]
 [1 1 0 1 1 1]
 [1 1 1 0 1 1]
 [1 1 0 0 0 1]
 [1 1 1 1 1 1]
 [0 0 0 0 1 0]
 [1 1 0 1 1 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [1 1 1 1 1 1]
 [0 1 0 1 1 1]
 [0 0 0 1 1 0]
 [1 1 0 1 1 1]
 [0 0 1 1 1 0]
 [1 0 0 0 1 0]
 [1 1 1 1 1 1]
 [1 0 1 0 0 1]
 [1 0 1 0 0 1]
 [1 1 1 0 1 1]
 [0 0 0 1 0 0]
 [1 1 0 0 0 0]
 [1 1 1 1 

In [7]:
data.Hasil.value_counts()

0    68
1    41
2    22
3    18
Name: Hasil, dtype: int64

In [8]:
counter = Counter(Y)
print(counter)

Counter({0: 68, 1: 41, 2: 22, 3: 18})


In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 42)

# Naive Bayes

In [10]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix
# Mengaktifkan/memanggil/membuat fungsi klasifikasi Naive Bayes
modelnb = GaussianNB()
# Memasukkan data training pada fungsi klasifikasi Naive Bayes
nbtrain = modelnb.fit(X_train, Y_train)

In [11]:
# Menentukan hasil prediksi dari x_test
Y_pred = nbtrain.predict(X_test)
Y_pred

array([1, 2, 2, 0, 1, 3, 0, 1, 1, 0, 1, 0, 0, 1, 3, 0, 0, 0, 0, 1, 1, 0,
       1, 0, 0, 1, 0, 0, 1, 0], dtype=int64)

In [12]:
confusion_matrix(Y_test, Y_pred)

array([[13,  1,  0,  0],
       [ 2, 10,  0,  0],
       [ 0,  0,  2,  0],
       [ 0,  0,  0,  2]], dtype=int64)

In [13]:
from sklearn.metrics import classification_report
print(classification_report(Y_test, Y_pred))

              precision    recall  f1-score   support

           0       0.87      0.93      0.90        14
           1       0.91      0.83      0.87        12
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         2

    accuracy                           0.90        30
   macro avg       0.94      0.94      0.94        30
weighted avg       0.90      0.90      0.90        30



In [14]:
import joblib
filename = 'naivebayess.sav'
joblib.dump(nbtrain, filename)

['naivebayess.sav']

# KNN

In [15]:
from sklearn.neighbors import KNeighborsClassifier
k = 3
#Train Model and Predict  
model_knn = KNeighborsClassifier(n_neighbors = k).fit(X_train,Y_train)
model_knn
Y_pred_KNN = model_knn.predict(X_test)

In [16]:
from sklearn.metrics import classification_report
print(classification_report(Y_test, Y_pred_KNN))

              precision    recall  f1-score   support

           0       0.70      1.00      0.82        14
           1       1.00      0.50      0.67        12
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         2

    accuracy                           0.80        30
   macro avg       0.93      0.88      0.87        30
weighted avg       0.86      0.80      0.78        30



In [17]:
import joblib
filename = 'knnmodel.sav'
joblib.dump(model_knn, filename)

['knnmodel.sav']

# Decision Tree

In [17]:
from sklearn.tree import DecisionTreeClassifier
model_default = DecisionTreeClassifier()
model_default = model_default.fit(X_train, Y_train)


In [18]:
from sklearn import metrics
# hasil prediksi training set
Y_pred_train_default = model_default.predict(X_train)
                                              
# hasil prediksi test set
Y_pred_default = model_default.predict(X_test)

# train set score
print("Train Accuracy     : ", metrics.accuracy_score(Y_train, Y_pred_train_default))
print("Train Precission   : ", metrics.accuracy_score(Y_train, Y_pred_train_default))
print("Train Recal        : ", metrics.accuracy_score(Y_train, Y_pred_train_default),'\n')

# test set score
print("Test Accuracy     : ", metrics.accuracy_score(Y_test, Y_pred_default))
print("Test Precission   : ", metrics.accuracy_score(Y_test, Y_pred_default))
print("Test Recal        : ", metrics.accuracy_score(Y_test, Y_pred_default))

Train Accuracy     :  0.9831932773109243
Train Precission   :  0.9831932773109243
Train Recal        :  0.9831932773109243 

Test Accuracy     :  0.8333333333333334
Test Precission   :  0.8333333333333334
Test Recal        :  0.8333333333333334


In [19]:
from sklearn.metrics import classification_report
print(classification_report(Y_test, Y_pred_default))

              precision    recall  f1-score   support

           0       0.74      1.00      0.85        14
           1       1.00      0.58      0.74        12
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         2

    accuracy                           0.83        30
   macro avg       0.93      0.90      0.90        30
weighted avg       0.88      0.83      0.82        30



In [22]:
import joblib
filename = 'decisisiontree.sav'
joblib.dump(model_default, filename)

['decisisiontree.sav']

# Logistic Regression

In [20]:
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression()
logistic.fit(X_train, Y_train)

log_pred = logistic.predict(X_test)

In [21]:
from sklearn.metrics import classification_report
print(classification_report(Y_test, log_pred))

              precision    recall  f1-score   support

           0       0.74      1.00      0.85        14
           1       1.00      0.58      0.74        12
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         2

    accuracy                           0.83        30
   macro avg       0.93      0.90      0.90        30
weighted avg       0.88      0.83      0.82        30



In [25]:
import joblib
filename = 'logistic.sav'
joblib.dump(logistic, filename)

['logistic.sav']

SVM

In [22]:
from sklearn.svm import SVC

svc = SVC()
svc_model = svc.fit(X_train, Y_train)

svc_pred = svc_model.predict(X_test)
print(classification_report(Y_test, svc_pred))

              precision    recall  f1-score   support

           0       0.74      1.00      0.85        14
           1       1.00      0.58      0.74        12
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         2

    accuracy                           0.83        30
   macro avg       0.93      0.90      0.90        30
weighted avg       0.88      0.83      0.82        30



In [27]:
import joblib
filename = 'svc.sav'
joblib.dump(svc_model, filename)

['svc.sav']

# Random Forest

In [23]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf_model = rf.fit(X_train, Y_train)

rf_pred = rf_model.predict(X_train)
print(classification_report(Y_train, rf_pred))

              precision    recall  f1-score   support

           0       0.98      0.98      0.98        54
           1       0.97      0.97      0.97        29
           2       1.00      1.00      1.00        20
           3       1.00      1.00      1.00        16

    accuracy                           0.98       119
   macro avg       0.99      0.99      0.99       119
weighted avg       0.98      0.98      0.98       119



In [29]:
import joblib
filename = 'randomforest.sav'
joblib.dump(rf_model, filename)

['randomforest.sav']

# Klasifikasi

In [24]:
Mood = float(input("Mood Tidak Stabil = "))
Sering_Pusing = float(input("Sering Pusing = "))
Sering_Menangis = float(input("Sering Menangis        = "))
Sulit_Tidur = float(input("Sulit Tidur            = "))
Pola_Makan = float(input("Pola makan tidak teratur = "))
Sering_Gelisah           = float(input("Sering merasa gelisah           = "))
data_baru     = [[ Mood, Sering_Pusing, Sering_Menangis, Sulit_Tidur, Pola_Makan, Sering_Gelisah]]

# memprediksi data baru
hasil_prediksi = rf_model.predict(data_baru)
hasil_prediksi = float(hasil_prediksi)

# cetak hasil prediksi
if hasil_prediksi == 0:
  print("\ncukup sehat")
elif hasil_prediksi == 1:
  print("\nkurang sehat")
elif hasil_prediksi == 2:
  print("\nsehat")
elif hasil_prediksi == 3:
  print("\ntidak sehat")
  

ValueError: could not convert string to float: ''