In [651]:
import regex as re
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MultiLabelBinarizer
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [652]:
main_data = pd.read_csv("dataset_indo.csv")

In [653]:
desc = pd.read_csv("symptom_Description_indo.csv", encoding='latin1')
prec = pd.read_csv("symptom_precaution_indo.csv")

In [654]:
main_data.head(10)

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Infeksi_jamur,gatal,ruam_kulit,letusan_kulit_nodal,bercak_diskromik,,,,,,,,,,,,,
1,Infeksi_jamur,ruam_kulit,letusan_kulit_nodal,bercak_diskromik,,,,,,,,,,,,,,
2,Infeksi_jamur,gatal,letusan_kulit_nodal,bercak_diskromik,,,,,,,,,,,,,,
3,Infeksi_jamur,gatal,ruam_kulit,bercak_diskromik,,,,,,,,,,,,,,
4,Infeksi_jamur,gatal,ruam_kulit,letusan_kulit_nodal,,,,,,,,,,,,,,
5,Infeksi_jamur,ruam_kulit,letusan_kulit_nodal,bercak_diskromik,,,,,,,,,,,,,,
6,Infeksi_jamur,gatal,letusan_kulit_nodal,bercak_diskromik,,,,,,,,,,,,,,
7,Infeksi_jamur,gatal,ruam_kulit,bercak_diskromik,,,,,,,,,,,,,,
8,Infeksi_jamur,gatal,ruam_kulit,letusan_kulit_nodal,,,,,,,,,,,,,,
9,Infeksi_jamur,gatal,ruam_kulit,letusan_kulit_nodal,bercak_diskromik,,,,,,,,,,,,,


In [655]:
main_data.sample(5)

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
2230,Tifus,kedinginan,muntah,kelelahan,demam_tinggi,mual,sembelit,nyeri_perut_bagian_atas,diare,tampilan_beracun_(tifus),sakit_perut_bagian_tengah,,,,,,,
2749,GERD,sakit_perut,asam_lambung,sariawan_di_lidah,muntah,batuk,nyeri_dada,,,,,,,,,,,
466,Penyakit_ulkus_peptikum,muntah,gangguan_pencernaan,kehilangan_nafsu_makan,nyeri_perut_bagian_atas,gatal_internal,,,,,,,,,,,,
2946,Artritis,kelemahan_otot,leher_kaku,sendi_bengkak,kekakuan_gerakan,berjalan_sakit,,,,,,,,,,,,
2673,Gastroenteritis,muntah,mata_cekung,dehidrasi,diare,,,,,,,,,,,,,


In [656]:
main_data.shape

(4920, 18)

In [657]:
main_data.Disease.value_counts()

Disease
Infeksi_jamur                         120
Hepatitis_C                           120
Hepatitis_E                           120
Hepatitis_alkoholik                   120
Tuberkulosis                          120
Flu_biasa                             120
Pneumonia                             120
Wasir_dimorfik_(ambeien)              120
Serangan_jantung                      120
Varises                               120
Hipotiroidisme                        120
Hipertiroidisme                       120
Hipoglikemia                          120
Osteoartritis                         120
Artritis                              120
(vertigo)_Vertigo_Posisi_Paroymsal    120
Jerawat                               120
Infeksi_saluran_kemih                 120
Psoriasis                             120
Hepatitis_D                           120
Hepatitis_B                           120
Alergi                                120
Hepatitis_A                           120
GERD                      

In [658]:
df = main_data.copy()
df.dropna(axis=1, how='all', inplace=True) # Dropping rows which are all NaN
df.fillna(0, inplace=True)                 # Replacing the NaN with 0

# Creating a custom label encoder
class CustomLabelEncoder(LabelEncoder):
    def __init__(self, start=0):
        self.start = start
        super().__init__()

    def fit_transform(self, y):
        encoded = super().fit_transform(y)
        encoded += self.start
        return encoded

# Flatten the 'Disease' column into a single Series
flattened_series = df['Disease'].astype(str)

# Create and fit label encoder for the 'Disease' column
encoder = CustomLabelEncoder(start=200)

In [659]:
encoded_values = encoder.fit_transform(flattened_series)
df['Disease'] = encoded_values

mapping_data = {'label_encoder': encoder}

label_mapping = {k: v for k, v in zip(mapping_data['label_encoder'].classes_, range(200, 200+len(mapping_data['label_encoder'].classes_)))}

df.head()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,222,gatal,ruam_kulit,letusan_kulit_nodal,bercak_diskromik,0,0,0,0,0,0,0,0,0,0,0,0,0
1,222,ruam_kulit,letusan_kulit_nodal,bercak_diskromik,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,222,gatal,letusan_kulit_nodal,bercak_diskromik,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,222,gatal,ruam_kulit,bercak_diskromik,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,222,gatal,ruam_kulit,letusan_kulit_nodal,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [660]:
label_mapping

{'(vertigo)_Vertigo_Posisi_Paroymsal': 200,
 'AIDS': 201,
 'Alergi': 202,
 'Artritis': 203,
 'Asma_Bronkial': 204,
 'Cacar_air': 205,
 'Demam_berdarah': 206,
 'Diabetes': 207,
 'Flu_biasa': 208,
 'GERD': 209,
 'Gastroenteritis': 210,
 'Hepatitis_A': 211,
 'Hepatitis_B': 212,
 'Hepatitis_C': 213,
 'Hepatitis_D': 214,
 'Hepatitis_E': 215,
 'Hepatitis_alkoholik': 216,
 'Hipertensi': 217,
 'Hipertiroidisme': 218,
 'Hipoglikemia': 219,
 'Hipotiroidisme': 220,
 'Impetigo': 221,
 'Infeksi_jamur': 222,
 'Infeksi_saluran_kemih': 223,
 'Jerawat': 224,
 'Kelumpuhan_(pendarahan_otak)': 225,
 'Kolestasis_kronis': 226,
 'Malaria': 227,
 'Migrain': 228,
 'Osteoartritis': 229,
 'Penyakit_kuning': 230,
 'Penyakit_ulkus_peptikum': 231,
 'Pneumonia': 232,
 'Psoriasis': 233,
 'Reaksi_Obat': 234,
 'Serangan_jantung': 235,
 'Spondilosis_serviks': 236,
 'Tifus': 237,
 'Tuberkulosis': 238,
 'Varises': 239,
 'Wasir_dimorfik_(ambeien)': 240}

In [661]:
encode_df = df.copy() 
encode_df = encode_df.drop(["Disease"], axis = 1)
flattened_series = encode_df.stack().astype(str)

# Create and fit label encoder.
encoder = LabelEncoder()
encoded_values = encoder.fit_transform(flattened_series)

# Reshape the encoded values back to the original DataFrame shape.
F_encoded_df = pd.DataFrame(encoded_values.reshape(encode_df.shape), columns=encode_df.columns,
                            index=encode_df.index)

# Store the mapping data for future use
Fmapping_data = {'label_encoder': encoder}
feature_mapping = {k: v for k, v in zip(Fmapping_data['label_encoder'].classes_, 
                                        Fmapping_data['label_encoder'].\
                                        transform(Fmapping_data['label_encoder'].classes_))}
F_encoded_df.head(3)

Unnamed: 0,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,27,114,67,6,0,0,0,0,0,0,0,0,0,0,0,0,0
1,114,67,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,27,67,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [662]:
feature_mapping

{'0': 0,
 'asam_lambung': 1,
 'batuk': 2,
 'bau_busuk_dari_urin': 3,
 'bercak_buang_air_kecil': 4,
 'bercak_di_tenggorokan': 5,
 'bercak_diskromik': 6,
 'berjalan_sakit': 7,
 'berkeringat': 8,
 'bersin_terus_menerus': 9,
 'bibir_mengering_dan_kesemutan': 10,
 'bicara_tidak_jelas': 11,
 'bintik-bintik_merah_di_seluruh_tubuh': 12,
 'dahak': 13,
 'dahak_berkarat': 14,
 'dahak_berlendir': 15,
 'darah_dalam_dahak': 16,
 'debu_seperti_perak': 17,
 'dehidrasi': 18,
 'demam_ringan': 19,
 'demam_tinggi': 20,
 'depresi': 21,
 'detak_jantung_cepat': 22,
 'diare': 23,
 'gagal_hati_akut': 24,
 'gangguan_pencernaan': 25,
 'gangguan_visual': 26,
 'gatal': 27,
 'gatal_internal': 28,
 'gerakan_berputar': 29,
 'haid_tidak_normal': 30,
 'hidung_meler': 31,
 'hubungan_di_luar_nikah': 32,
 'iritasi_pada_anus': 33,
 'iritasi_tenggorokan': 34,
 'jantung_berdebar': 35,
 'jerawat_berisi_nanah': 36,
 'kadar_gula_tidak_teratur': 37,
 'kaki_bengkak': 38,
 'kecemasan': 39,
 'kedinginan': 40,
 'kegelisahan': 41,
 '

In [663]:
label_encoded_df = pd.concat([df['Disease'], F_encoded_df], axis = 1)
label_encoded_df.head()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,222,27,114,67,6,0,0,0,0,0,0,0,0,0,0,0,0,0
1,222,114,67,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,222,27,67,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,222,27,114,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,222,27,114,67,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [664]:
# Creating X and y
model_features = label_encoded_df.columns.tolist()
model_features.remove("Disease")
X = label_encoded_df[model_features]
y = label_encoded_df["Disease"]

In [665]:
# One_hot_encoding the y column to use it as a multicalss in the model output layer
y_encoded = pd.get_dummies(y)
y_encoded.shape

(4920, 41)

In [666]:
# The column names are the mapping of the target column. **REMEMBER THIS**
y_encoded.head()

Unnamed: 0,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [667]:
# Reshape the data
X_reshaped = X.values.reshape(-1, 1)
scaler = StandardScaler().fit(X_reshaped)
X_scaled_reshaped = scaler.transform(X_reshaped)
# Reshape back to original shape
X_scaled = X_scaled_reshaped.reshape(X.shape)
X_df = pd.DataFrame(X_scaled)
X_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,-0.046698,2.103663,0.941974,-0.56575,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051
1,2.103663,0.941974,-0.56575,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051
2,-0.046698,0.941974,-0.56575,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051
3,-0.046698,2.103663,-0.56575,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051
4,-0.046698,2.103663,0.941974,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051


In [668]:
X_train, X_test, y_train, y_test = train_test_split(X_df, y_encoded, test_size = 0.25, random_state=42)
X_eval, X_test, y_eval, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

In [669]:
X_train_tensor = tf.convert_to_tensor(X_train.values, dtype=tf.float32)
X_test_tensor = tf.convert_to_tensor(X_test.values, dtype=tf.float32)
X_eval_tensor = tf.convert_to_tensor(X_eval.values, dtype=tf.float32)
y_train_tensor = tf.convert_to_tensor(y_train, dtype=tf.float64)
y_test_tensor = tf.convert_to_tensor(y_test, dtype=tf.float64)
y_eval_tensor = tf.convert_to_tensor(y_eval, dtype=tf.float64)

In [670]:
X_train_tensor

<tf.Tensor: shape=(3690, 17), dtype=float32, numpy=
array([[ 0.8678234 ,  1.2138584 ,  0.37348768, ..., -0.71405095,
        -0.71405095, -0.71405095],
       [-0.04669771,  0.4723548 ,  0.59593874, ..., -0.71405095,
        -0.71405095, -0.71405095],
       [ 1.263292  ,  1.0408409 , -0.2691488 , ..., -0.71405095,
        -0.71405095, -0.71405095],
       ...,
       [ 2.1036627 ,  0.27462053,  1.485743  , ..., -0.71405095,
        -0.71405095, -0.71405095],
       [-0.04669771,  2.1036627 ,  0.94197375, ..., -0.71405095,
        -0.71405095, -0.71405095],
       [-0.04669771,  2.1036627 ,  2.177813  , ..., -0.71405095,
        -0.71405095, -0.71405095]], dtype=float32)>

In [671]:
y_train_tensor

<tf.Tensor: shape=(3690, 41), dtype=float64, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])>

In [672]:
with tf.device('/GPU:0'):
    model_1 = keras.Sequential([
        layers.Input(shape=(X_train_tensor.shape[1],)),
        layers.Dense(32, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.1),
        layers.Dense(128, activation='tanh'),
        layers.BatchNormalization(),
        layers.Dense(128, activation='tanh'),
        layers.Dropout(0.1),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(y_train_tensor.shape[1], activation='softmax')])
    
    model_1.compile(optimizer= 'adam', loss='categorical_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', patience=4, mode='max')
    history = model_1.fit(X_train_tensor, y_train_tensor, epochs=500, callbacks=[early_stopping],
                batch_size=16, validation_data=(X_eval_tensor, y_eval_tensor))

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500


In [673]:
model_1.evaluate(X_test_tensor, y_test_tensor)



[0.05016420781612396, 0.9804878234863281]

In [674]:
def encode_user_input(user_input, mapping=feature_mapping):
    encoded_input = []
    for symptom in user_input:
        for key in mapping.keys():
            if symptom.strip().lower() == key.strip().lower():
                encoded_input.append(mapping[key])
                break 
    return encoded_input

In [675]:
user_input = ['gatal','ruam_kulit','letusan_kulit_nodal','bercak_diskromik']
encoded_input = encode_user_input(user_input)
encoded_input

[27, 114, 67, 6]

In [676]:
# Transforming the encoded user input to a tensor.
input_tensor = tf.cast(encoded_input, tf.float32)
input_tensor

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([ 27., 114.,  67.,   6.], dtype=float32)>

In [677]:
# Checking the number of dimensions.
input_tensor.ndim == X_train_tensor[1].ndim

True

In [678]:
label_encoded_df.iloc[0][1:5]

Symptom_1     27
Symptom_2    114
Symptom_3     67
Symptom_4      6
Name: 0, dtype: int32

In [679]:
label_encoded_df.head(1)

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,222,27,114,67,6,0,0,0,0,0,0,0,0,0,0,0,0,0


In [680]:
padding_value = tf.constant(130, dtype=tf.float32)
desired_length = X_train_tensor[1].shape[0]
padding_length = desired_length - tf.shape(input_tensor)[0]
padding_tensor = tf.fill((padding_length,), padding_value)
final_input = tf.concat([input_tensor, padding_tensor], axis=0)
final_input

<tf.Tensor: shape=(17,), dtype=float32, numpy=
array([ 27., 114.,  67.,   6., 130., 130., 130., 130., 130., 130., 130.,
       130., 130., 130., 130., 130., 130.], dtype=float32)>

In [681]:
target_index = y_encoded.columns.tolist() 

In [682]:
final_array = final_input.numpy()
final_reshaped = final_array.reshape(-1, 1)
X_scaled = scaler.transform(final_reshaped)
final_tensor = tf.convert_to_tensor(X_scaled)
final_tensor = tf.squeeze(final_tensor)
final_tensor

<tf.Tensor: shape=(17,), dtype=float32, numpy=
array([-0.04669771,  2.1036627 ,  0.94197375, -0.5657502 ,  2.4991312 ,
        2.4991312 ,  2.4991312 ,  2.4991312 ,  2.4991312 ,  2.4991312 ,
        2.4991312 ,  2.4991312 ,  2.4991312 ,  2.4991312 ,  2.4991312 ,
        2.4991312 ,  2.4991312 ], dtype=float32)>

In [683]:
X_df.head(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,-0.046698,2.103663,0.941974,-0.56575,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051,-0.714051


In [684]:
predict_proba = model_1.predict(tf.expand_dims(final_input, axis = 0)) 
predicted_class_index = np.argmax(predict_proba) 
prediction_encode = target_index[predicted_class_index] 
inverse_label_encoding = {v: k for k, v in label_mapping.items()}
prediction = inverse_label_encoding[prediction_encode]
prediction



'Tuberkulosis'

In [685]:
main_data.head()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Infeksi_jamur,gatal,ruam_kulit,letusan_kulit_nodal,bercak_diskromik,,,,,,,,,,,,,
1,Infeksi_jamur,ruam_kulit,letusan_kulit_nodal,bercak_diskromik,,,,,,,,,,,,,,
2,Infeksi_jamur,gatal,letusan_kulit_nodal,bercak_diskromik,,,,,,,,,,,,,,
3,Infeksi_jamur,gatal,ruam_kulit,bercak_diskromik,,,,,,,,,,,,,,
4,Infeksi_jamur,gatal,ruam_kulit,letusan_kulit_nodal,,,,,,,,,,,,,,


In [686]:
df = main_data.copy() 
# Combine all symptom columns into a single column
df['All Symptoms'] = df.apply(lambda row: ','.join(row.dropna()), axis=1)
# Drop duplicate symptoms within each cell
df['All Symptoms'] = df['All Symptoms'].apply(lambda x: ','.join(sorted(set(x.split(','))) if x else ''))
stay_cols= ['Disease', 'All Symptoms']
df = df[stay_cols]
df.head()

Unnamed: 0,Disease,All Symptoms
0,Infeksi_jamur,"Infeksi_jamur,bercak_diskromik,gatal,letusan_k..."
1,Infeksi_jamur,"Infeksi_jamur,bercak_diskromik,letusan_kulit_n..."
2,Infeksi_jamur,"Infeksi_jamur,bercak_diskromik,gatal,letusan_k..."
3,Infeksi_jamur,"Infeksi_jamur,bercak_diskromik,gatal,ruam_kulit"
4,Infeksi_jamur,"Infeksi_jamur,gatal,letusan_kulit_nodal,ruam_k..."


In [687]:
df['All Symptoms'][0]

'Infeksi_jamur,bercak_diskromik,gatal,letusan_kulit_nodal,ruam_kulit'

In [688]:
def strip_to_basic_tokens(text):
    # Remove doble spaces and underscores
    text = re.sub(r'[_\s]+', ' ', text)
    # Split by commas and lowercase the tokens
    tokens = [token.strip().lower() for token in text.split(',')]
    return tokens

# Apply the function to 'All Symptoms' column
df['Basic Tokens'] = df['All Symptoms'].apply(strip_to_basic_tokens)
df['Basic Tokens'] = df['Basic Tokens'].apply(lambda x: ', '.join(x))
df = df.drop(['All Symptoms'], axis = 1)
df.head()

Unnamed: 0,Disease,Basic Tokens
0,Infeksi_jamur,"infeksi jamur, bercak diskromik, gatal, letusa..."
1,Infeksi_jamur,"infeksi jamur, bercak diskromik, letusan kulit..."
2,Infeksi_jamur,"infeksi jamur, bercak diskromik, gatal, letusa..."
3,Infeksi_jamur,"infeksi jamur, bercak diskromik, gatal, ruam k..."
4,Infeksi_jamur,"infeksi jamur, gatal, letusan kulit nodal, rua..."


In [689]:
df['Basic Tokens'][0]

'infeksi jamur, bercak diskromik, gatal, letusan kulit nodal, ruam kulit'

In [690]:
dfE = df.copy()
dfE['Basic Tokens'] = dfE['Basic Tokens'].apply(lambda x: x.split(', '))

mlb = MultiLabelBinarizer()
one_hot_encoded = pd.DataFrame(mlb.fit_transform(dfE['Basic Tokens']), columns=mlb.classes_, index=df.index)

df_encoded = pd.concat([dfE, one_hot_encoded], axis=1)

df_encoded = df_encoded.drop(columns=['Basic Tokens'])
df_encoded.head()

Unnamed: 0,Disease,(vertigo) vertigo posisi paroymsal,aids,alergi,artritis,asam lambung,asma bronkial,batuk,bau busuk dari urin,bercak buang air kecil,bercak di tenggorokan,bercak diskromik,berjalan sakit,berkeringat,bersin terus menerus,bibir mengering dan kesemutan,bicara tidak jelas,bintik-bintik merah di seluruh tubuh,cacar air,dahak,dahak berkarat,dahak berlendir,darah dalam dahak,debu seperti perak,dehidrasi,demam berdarah,demam ringan,demam tinggi,depresi,detak jantung cepat,diabetes,diare,flu biasa,gagal hati akut,gangguan pencernaan,gangguan visual,gastroenteritis,gatal,gatal internal,gerakan berputar,gerd,haid tidak normal,hepatitis a,hepatitis alkoholik,hepatitis b,hepatitis c,hepatitis d,hepatitis e,hidung meler,hipertensi,hipertiroidisme,hipoglikemia,hipotiroidisme,hubungan di luar nikah,impetigo,infeksi jamur,infeksi saluran kemih,iritasi pada anus,iritasi tenggorokan,jantung berdebar,jerawat,jerawat berisi nanah,kadar gula tidak teratur,kaki bengkak,kecemasan,kedinginan,kegelisahan,kegoyahan,kehilangan keseimbangan,kehilangan nafsu makan,kehilangan penciuman,kekakuan gerakan,kelebihan cairan,kelelahan,kelemahan otot,kelemahan pada salah satu sisi tubuh,kelemahan pada tungkai,kelenjar getah bening membengkak,kelesuan,keluarnya gas,kelumpuhan (pendarahan otak),kemacetan,kemerahan pada mata,kenaikan berat badan,kerak kuning,ketidaknyamanan kandung kemih,kolestasis kronis,koma,komedo,kram,kuku rapuh,kulit kekuningan,kurangnya konsentrasi,leher kaku,letusan kulit nodal,luka merah di sekitar hidung,malaise,malaria,mata berair,mata cekung,mata menguning,melepuh,memar,menerima suntikan yang tidak steril,menerima transfusi darah,menggigil,migrain,mual,mudah tersinggung,muntah,nafsu makan meningkat,nyeri dada,nyeri di belakang mata,nyeri di daerah anus,nyeri lutut,nyeri otot,nyeri perut bagian atas,nyeri saat buang air besar,nyeri sendi,nyeri sendi pinggul,obesitas,osteoartritis,pembengkakan ekstremitas,pembengkakan perut,pembesaran tiroid,pembuluh darah bengkak,pembuluh darah yang menonjol di betis,pendarahan perut,pengecilan otot,pengelupasan kulit,penglihatan kabur dan terdistorsi,penurunan berat badan,penyakit kuning,penyakit ulkus peptikum,penyok kecil di kuku,perasaan terus menerus dari air seni,perubahan sensorium,perubahan suasana hati,perut kembung,pneumonia,poliuria,psoriasis,pusing,radang kuku,rasa lapar yang berlebihan,rasa terbakar saat berkemih,reaksi obat,riwayat keluarga,riwayat konsumsi alkohol,ruam kulit,sakit kepala,sakit leher,sakit perut,sakit perut bagian tengah,sakit punggung,sariawan di lidah,scurring,sembelit,sendi bengkak,serangan jantung,sesak napas,spondilosis serviks,tampilan beracun (tifus),tangan dan kaki dingin,tekanan sinus,tifus,tinja berdarah,tuberkulosis,urin berwarna gelap,urine berwarna kuning,varises,wajah dan mata bengkak,wasir dimorfik (ambeien)
0,Infeksi_jamur,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Infeksi_jamur,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Infeksi_jamur,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Infeksi_jamur,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Infeksi_jamur,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [691]:
df_encoded.shape

(4920, 173)

In [692]:
disease_names = [key for key in label_mapping.keys()]
diseases = [strip_to_basic_tokens(disease) for disease in disease_names]
diseases_cleaned = [item[0] if isinstance(item, list) else item for item in diseases]
df_encoded = df_encoded.drop(diseases_cleaned, axis = 1)
df_encoded.shape

(4920, 132)

In [693]:
model_features = df_encoded.columns.tolist()
model_features.remove("Disease")
X = df_encoded[model_features]
y = df_encoded["Disease"]

In [694]:
y_encoded = pd.get_dummies(y)
y_encoded.shape

(4920, 41)

In [695]:
y_encoded.head()

Unnamed: 0,(vertigo)_Vertigo_Posisi_Paroymsal,AIDS,Alergi,Artritis,Asma_Bronkial,Cacar_air,Demam_berdarah,Diabetes,Flu_biasa,GERD,Gastroenteritis,Hepatitis_A,Hepatitis_B,Hepatitis_C,Hepatitis_D,Hepatitis_E,Hepatitis_alkoholik,Hipertensi,Hipertiroidisme,Hipoglikemia,Hipotiroidisme,Impetigo,Infeksi_jamur,Infeksi_saluran_kemih,Jerawat,Kelumpuhan_(pendarahan_otak),Kolestasis_kronis,Malaria,Migrain,Osteoartritis,Penyakit_kuning,Penyakit_ulkus_peptikum,Pneumonia,Psoriasis,Reaksi_Obat,Serangan_jantung,Spondilosis_serviks,Tifus,Tuberkulosis,Varises,Wasir_dimorfik_(ambeien)
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [696]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size = 0.25, random_state=42)
X_eval, X_test, y_eval, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

In [697]:
X_train_tensor = tf.convert_to_tensor(X_train.values, dtype=tf.float32)
X_test_tensor = tf.convert_to_tensor(X_test.values, dtype=tf.float32)
X_eval_tensor = tf.convert_to_tensor(X_eval.values, dtype=tf.float32)
y_train_tensor = tf.convert_to_tensor(y_train, dtype=tf.float64)
y_test_tensor = tf.convert_to_tensor(y_test, dtype=tf.float64)
y_eval_tensor = tf.convert_to_tensor(y_eval, dtype=tf.float64)

In [698]:
X_train_tensor

<tf.Tensor: shape=(3690, 131), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>

In [699]:
with tf.device('/GPU:0'):
    model_2 = keras.Sequential([
        layers.Input(shape=(X_train_tensor.shape[1],)),
        layers.Dense(160, activation='relu'),
        layers.Dropout(0.1),
        layers.Dense(200, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(240, activation='tanh'),
        layers.BatchNormalization(),
        layers.Dense(240, activation='tanh'),
        layers.Dropout(0.2),
        layers.Dense(200, activation='relu'),
        layers.Dropout(0.1),
        layers.Dense(160, activation='relu'),
        layers.Dense(y_train_tensor.shape[1], activation='softmax')])
    
    model_2.compile(optimizer= 'adam', loss='categorical_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', patience=4, mode='max')
    history = model_2.fit(X_train_tensor, y_train_tensor, epochs=500, callbacks=[early_stopping],
                batch_size=16, validation_data=(X_eval_tensor, y_eval_tensor))

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500


In [700]:
model_2.evaluate(X_test_tensor, y_test_tensor)



[5.447922831081087e-06, 1.0]

In [701]:
user_input = ['sakit_perut','asam_lambung','nyeri_dada']

original_data = df_encoded.copy()

def strip_to_basic_tokens(symptoms):
    symptoms = [symptom.strip().lower().replace(' ', '_').replace('_', ' ') for symptom in symptoms]
    return [re.sub(r'\s+', ' ', symptom) for symptom in symptoms]

user_input_stripped = strip_to_basic_tokens(user_input)
mlb = MultiLabelBinarizer(classes=df_encoded.columns)
user_input_encoded = pd.DataFrame(mlb.fit_transform([user_input_stripped]), columns=mlb.classes_)
final_user_input = pd.concat([pd.DataFrame(columns=original_data.columns), user_input_encoded], axis=0)
final_user_input = final_user_input.drop(['Disease'],axis = 1)
final_user_input.head()

Unnamed: 0,asam lambung,batuk,bau busuk dari urin,bercak buang air kecil,bercak di tenggorokan,bercak diskromik,berjalan sakit,berkeringat,bersin terus menerus,bibir mengering dan kesemutan,bicara tidak jelas,bintik-bintik merah di seluruh tubuh,dahak,dahak berkarat,dahak berlendir,darah dalam dahak,debu seperti perak,dehidrasi,demam ringan,demam tinggi,depresi,detak jantung cepat,diare,gagal hati akut,gangguan pencernaan,gangguan visual,gatal,gatal internal,gerakan berputar,haid tidak normal,hidung meler,hubungan di luar nikah,iritasi pada anus,iritasi tenggorokan,jantung berdebar,jerawat berisi nanah,kadar gula tidak teratur,kaki bengkak,kecemasan,kedinginan,kegelisahan,kegoyahan,kehilangan keseimbangan,kehilangan nafsu makan,kehilangan penciuman,kekakuan gerakan,kelebihan cairan,kelelahan,kelemahan otot,kelemahan pada salah satu sisi tubuh,kelemahan pada tungkai,kelenjar getah bening membengkak,kelesuan,keluarnya gas,kemacetan,kemerahan pada mata,kenaikan berat badan,kerak kuning,ketidaknyamanan kandung kemih,koma,komedo,kram,kuku rapuh,kulit kekuningan,kurangnya konsentrasi,leher kaku,letusan kulit nodal,luka merah di sekitar hidung,malaise,mata berair,mata cekung,mata menguning,melepuh,memar,menerima suntikan yang tidak steril,menerima transfusi darah,menggigil,mual,mudah tersinggung,muntah,nafsu makan meningkat,nyeri dada,nyeri di belakang mata,nyeri di daerah anus,nyeri lutut,nyeri otot,nyeri perut bagian atas,nyeri saat buang air besar,nyeri sendi,nyeri sendi pinggul,obesitas,pembengkakan ekstremitas,pembengkakan perut,pembesaran tiroid,pembuluh darah bengkak,pembuluh darah yang menonjol di betis,pendarahan perut,pengecilan otot,pengelupasan kulit,penglihatan kabur dan terdistorsi,penurunan berat badan,penyok kecil di kuku,perasaan terus menerus dari air seni,perubahan sensorium,perubahan suasana hati,perut kembung,poliuria,pusing,radang kuku,rasa lapar yang berlebihan,rasa terbakar saat berkemih,riwayat keluarga,riwayat konsumsi alkohol,ruam kulit,sakit kepala,sakit leher,sakit perut,sakit perut bagian tengah,sakit punggung,sariawan di lidah,scurring,sembelit,sendi bengkak,sesak napas,tampilan beracun (tifus),tangan dan kaki dingin,tekanan sinus,tinja berdarah,urin berwarna gelap,urine berwarna kuning,wajah dan mata bengkak
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [702]:
user_tensor = tf.convert_to_tensor(final_user_input.values, dtype=tf.float32)
user_tensor[0]

<tf.Tensor: shape=(131,), dtype=float32, numpy=
array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>

In [703]:
predict_proba = model_2.predict(user_tensor)
predicted_class_index = np.argmax(predict_proba)
prediction_encode = target_index[predicted_class_index]
inverse_label_encoding = {v: k for k, v in label_mapping.items()}
prediction = inverse_label_encoding[prediction_encode]
prediction



'GERD'

In [704]:
user_input = ['gatal','ruam_kulit']
original_data = df_encoded.copy()
user_input_stripped = strip_to_basic_tokens(user_input)
user_input_encoded = pd.DataFrame(mlb.fit_transform([user_input_stripped]), columns=mlb.classes_)
final_user_input = pd.concat([pd.DataFrame(columns=original_data.columns), user_input_encoded], axis=0)
final_user_input = final_user_input.drop(['Disease'],axis = 1)
final_user_input.head()

Unnamed: 0,asam lambung,batuk,bau busuk dari urin,bercak buang air kecil,bercak di tenggorokan,bercak diskromik,berjalan sakit,berkeringat,bersin terus menerus,bibir mengering dan kesemutan,bicara tidak jelas,bintik-bintik merah di seluruh tubuh,dahak,dahak berkarat,dahak berlendir,darah dalam dahak,debu seperti perak,dehidrasi,demam ringan,demam tinggi,depresi,detak jantung cepat,diare,gagal hati akut,gangguan pencernaan,gangguan visual,gatal,gatal internal,gerakan berputar,haid tidak normal,hidung meler,hubungan di luar nikah,iritasi pada anus,iritasi tenggorokan,jantung berdebar,jerawat berisi nanah,kadar gula tidak teratur,kaki bengkak,kecemasan,kedinginan,kegelisahan,kegoyahan,kehilangan keseimbangan,kehilangan nafsu makan,kehilangan penciuman,kekakuan gerakan,kelebihan cairan,kelelahan,kelemahan otot,kelemahan pada salah satu sisi tubuh,kelemahan pada tungkai,kelenjar getah bening membengkak,kelesuan,keluarnya gas,kemacetan,kemerahan pada mata,kenaikan berat badan,kerak kuning,ketidaknyamanan kandung kemih,koma,komedo,kram,kuku rapuh,kulit kekuningan,kurangnya konsentrasi,leher kaku,letusan kulit nodal,luka merah di sekitar hidung,malaise,mata berair,mata cekung,mata menguning,melepuh,memar,menerima suntikan yang tidak steril,menerima transfusi darah,menggigil,mual,mudah tersinggung,muntah,nafsu makan meningkat,nyeri dada,nyeri di belakang mata,nyeri di daerah anus,nyeri lutut,nyeri otot,nyeri perut bagian atas,nyeri saat buang air besar,nyeri sendi,nyeri sendi pinggul,obesitas,pembengkakan ekstremitas,pembengkakan perut,pembesaran tiroid,pembuluh darah bengkak,pembuluh darah yang menonjol di betis,pendarahan perut,pengecilan otot,pengelupasan kulit,penglihatan kabur dan terdistorsi,penurunan berat badan,penyok kecil di kuku,perasaan terus menerus dari air seni,perubahan sensorium,perubahan suasana hati,perut kembung,poliuria,pusing,radang kuku,rasa lapar yang berlebihan,rasa terbakar saat berkemih,riwayat keluarga,riwayat konsumsi alkohol,ruam kulit,sakit kepala,sakit leher,sakit perut,sakit perut bagian tengah,sakit punggung,sariawan di lidah,scurring,sembelit,sendi bengkak,sesak napas,tampilan beracun (tifus),tangan dan kaki dingin,tekanan sinus,tinja berdarah,urin berwarna gelap,urine berwarna kuning,wajah dan mata bengkak
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [705]:
user_tensor = tf.convert_to_tensor(final_user_input.values, dtype=tf.float32)
user_tensor[0]

<tf.Tensor: shape=(131,), dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>

In [706]:
predict_proba = model_2.predict(user_tensor)
predicted_class_index = np.argmax(predict_proba)
prediction_encode = target_index[predicted_class_index]
inverse_label_encoding = {v: k for k, v in label_mapping.items()}
prediction = inverse_label_encoding[prediction_encode]
prediction



'Infeksi_jamur'

In [707]:
# Prediksi menggunakan model
predict_proba = model_2.predict(user_tensor)
predicted_class_indices = np.argsort(predict_proba[0])[::-1][:3]  # Ambil 3 indeks dengan probabilitas tertinggi

# Decode indeks prediksi ke nama penyakit
predicted_diseases = [inverse_label_encoding[target_index[idx]] for idx in predicted_class_indices]



In [708]:
# Tampilkan 3 penyakit yang memungkinkan
print("Tiga jenis penyakit yang memungkinkan:")
for disease in predicted_diseases:
    print("- ", disease)

Tiga jenis penyakit yang memungkinkan:
-  Infeksi_jamur
-  Cacar_air
-  Jerawat


In [709]:
# Fungsi untuk mendapatkan deskripsi dan tindakan pencegahan untuk satu penyakit
def get_description_and_precautions(disease):
    description = desc[desc['Disease'] == disease]['Description'].values
    precautions = prec[prec['Disease'] == disease].iloc[0, 1:].tolist()

    print(f"\nPenyakit: {disease}")
    print(f"Deskripsi Penyakit: {description[0] if len(description) > 0 else 'No description available.'}")
    print("Penanganan:")
    for i, precaution in enumerate(precautions, 1):
        if isinstance(precaution, str):
            print(f"{i}. {precaution}")

In [710]:
# Tampilkan deskripsi dan tindakan pencegahan untuk setiap penyakit yang diprediksi
for disease in predicted_diseases:
    get_description_and_precautions(disease)


Penyakit: Infeksi_jamur
Deskripsi Penyakit: Pada manusia, infeksi jamur terjadi ketika jamur yang menyerang mengambil alih suatu area tubuh dan terlalu banyak untuk ditangani oleh sistem kekebalan tubuh. Jamur dapat hidup di udara, tanah, air, dan tanaman. Ada juga beberapa jamur yang hidup secara alami di dalam tubuh manusia. Seperti halnya mikroba lainnya, ada jamur yang bermanfaat dan ada pula jamur yang berbahaya.
Penanganan:
1. mandi dua kali
2. gunakan detol atau mimba dalam air mandi
3. jaga agar area yang terinfeksi tetap kering
4. gunakan pakaian yang bersih

Penyakit: Cacar_air
Deskripsi Penyakit: Cacar air adalah penyakit yang sangat menular yang disebabkan oleh virus varicella-zoster (VZV). Penyakit ini dapat menyebabkan ruam yang gatal dan melepuh. Ruam pertama kali muncul di dada, punggung, dan wajah, lalu menyebar ke seluruh tubuh, menyebabkan antara 250 hingga 500 lepuhan yang gatal.
Penanganan:
1. gunakan mimba saat mandi 
2. mengkonsumsi daun mimba
3. ambil vaksin
4.

In [711]:
def get_description_and_precautions(disease):
    description = desc[desc['Disease'] == disease]['Description'].values
    precautions = prec[prec['Disease'] == disease].iloc[0, 1:].tolist()
    
    print(f"Penyakit: {disease}")
    print(f"Deskripsi Penyakit: {description[0] if len(description) > 0 else 'No description available.'}")
    print("Penanganan:")
    for i, precaution in enumerate(precautions, 1):
        if isinstance(precaution, str):
            print(f"{i}. {precaution}")

get_description_and_precautions(prediction)

Penyakit: Infeksi_jamur
Deskripsi Penyakit: Pada manusia, infeksi jamur terjadi ketika jamur yang menyerang mengambil alih suatu area tubuh dan terlalu banyak untuk ditangani oleh sistem kekebalan tubuh. Jamur dapat hidup di udara, tanah, air, dan tanaman. Ada juga beberapa jamur yang hidup secara alami di dalam tubuh manusia. Seperti halnya mikroba lainnya, ada jamur yang bermanfaat dan ada pula jamur yang berbahaya.
Penanganan:
1. mandi dua kali
2. gunakan detol atau mimba dalam air mandi
3. jaga agar area yang terinfeksi tetap kering
4. gunakan pakaian yang bersih
