In [1]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import classification_report
from keras.models import load_model

In [2]:
!wget --no-check-certificate \
    "https://drive.google.com/uc?export=download&id=11AFzQrlWoGF9SqFrDGGS65oP9L48Ajg4" \
    -O "/content/diabetes_dataset__2019.csv"

--2023-12-18 05:51:19--  https://drive.google.com/uc?export=download&id=11AFzQrlWoGF9SqFrDGGS65oP9L48Ajg4
Resolving drive.google.com (drive.google.com)... 173.194.212.101, 173.194.212.139, 173.194.212.100, ...
Connecting to drive.google.com (drive.google.com)|173.194.212.101|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-04-b4-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/vcsu2fcmq4mqeqbvtmnmiiak73acvcqe/1702878675000/03346498828059726724/*/11AFzQrlWoGF9SqFrDGGS65oP9L48Ajg4?e=download&uuid=5b5dff89-facb-4d36-8f03-954fb644ba22 [following]
--2023-12-18 05:51:19--  https://doc-04-b4-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/vcsu2fcmq4mqeqbvtmnmiiak73acvcqe/1702878675000/03346498828059726724/*/11AFzQrlWoGF9SqFrDGGS65oP9L48Ajg4?e=download&uuid=5b5dff89-facb-4d36-8f03-954fb644ba22
Resolving doc-04-b4-docs.googleusercontent.com (doc-04-b4-docs.googleusercontent.com)... 108.177.11.132, 2

In [3]:
diabetes_df = pd.read_csv("/content/diabetes_dataset__2019.csv")

diabetes_df.head()
diabetes_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 952 entries, 0 to 951
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Age               952 non-null    object 
 1   Gender            952 non-null    object 
 2   Family_Diabetes   952 non-null    object 
 3   highBP            952 non-null    object 
 4   PhysicallyActive  952 non-null    object 
 5   BMI               948 non-null    float64
 6   Smoking           952 non-null    object 
 7   Alcohol           952 non-null    object 
 8   Sleep             952 non-null    int64  
 9   SoundSleep        952 non-null    int64  
 10  RegularMedicine   952 non-null    object 
 11  JunkFood          952 non-null    object 
 12  Stress            952 non-null    object 
 13  BPLevel           952 non-null    object 
 14  Pregancies        910 non-null    float64
 15  Pdiabetes         951 non-null    object 
 16  UriationFreq      952 non-null    object 
 1

In [4]:
print(diabetes_df['Pdiabetes'].unique())


['0' 'yes' nan 'no']


In [5]:
# see the value of each column
print('Value of each column')
for col in diabetes_df.columns:
    print(col)
    print(diabetes_df[col].value_counts())
    print("\n")

# remove unused data
diabetes_df.drop('SoundSleep',axis=1,inplace=True)
diabetes_df.drop('highBP',axis=1,inplace=True)
diabetes_df.drop('UriationFreq',axis=1,inplace=True)
diabetes_df.drop('Stress',axis=1,inplace=True)
diabetes_df.drop('JunkFood',axis=1,inplace=True)
diabetes_df.drop('BPLevel',axis=1,inplace=True)
diabetes_df.drop('BMI',axis=1,inplace=True)

# remove empty data
diabetes_df = diabetes_df[diabetes_df['Pregancies'].notna()]
diabetes_df = diabetes_df[diabetes_df['Diabetic'].notna()]
diabetes_df = diabetes_df[diabetes_df['Pdiabetes'].notna()]

# Age
diabetes_df['Age'].replace(['less than 40', '40-49', '50-59','60 or older'], ['0','1','2','3',], inplace=True)

# Gender
diabetes_df['Gender'].replace(['Male','Female'],['0','1'], inplace=True)

#Family_Diabetes
diabetes_df['Family_Diabetes'].replace(['no', 'yes'],['0','1'], inplace=True)

# PhysicallyActive
diabetes_df['PhysicallyActive'].replace(['none', 'less than half an hr', 'more than half an hr', 'one hr or more'], ['0','1','2','3'], inplace=True)

# Smoking
diabetes_df['Smoking'].replace(['no','yes'],['0','1'], inplace=True)

# Alcohol
diabetes_df['Alcohol'].replace(['no','yes'],['0','1'], inplace=True)


# Kategorikan parameter "Sleep" menjadi "kurang dari 8 jam" dan "lebih dari 8 jam"
diabetes_df['Sleep_Category'] = pd.cut(diabetes_df['Sleep'], bins=[-float('inf'), 8, float('inf')], labels=['1', '0'], right=False)

# Drop kolom 'Sleep' yang tidak diperlukan setelah dikategorikan
diabetes_df.drop('Sleep', axis=1, inplace=True)
diabetes_df = diabetes_df.rename(columns={"Sleep_Category": "Sleep"})

# RegularMedicine
diabetes_df['RegularMedicine'].replace(['no', 'o', 'yes'], ['0', '0', '1'], inplace=True)

# Kategorikan parameter "Pregancies" menjadi "kurang dari 2 anak" dan "lebih dari 2 anak"
diabetes_df['Pregancies_Category'] = pd.cut(diabetes_df['Pregancies'], bins=[-float('inf'), 2, float('inf')], labels=['0', '1'], right=False)

# Drop kolom 'Pdiabetes' yang tidak diperlukan setelah dikategorikan
diabetes_df.drop('Pregancies', axis=1, inplace=True)
diabetes_df = diabetes_df.rename(columns={"Pregancies_Category": "Pregancies"})

# Pdiabetes
diabetes_df['Pdiabetes'].replace(['0','yes'],['0','1'], inplace=True)

# Diabetic
diabetes_df['Diabetic'].replace(['no',' no','yes'],['0','0','1'], inplace=True)

# Convert the 'Diabetic' column to numeric format
diabetes_df['Diabetic'] = pd.to_numeric(diabetes_df['Diabetic'], errors='coerce')

# Remove rows with NaN values in the target column
diabetes_df = diabetes_df.dropna(subset=['Diabetic'])

Value of each column
Age
less than 40    488
40-49           164
50-59           156
60 or older     144
Name: Age, dtype: int64


Gender
Male      580
Female    372
Name: Gender, dtype: int64


Family_Diabetes
no     498
yes    454
Name: Family_Diabetes, dtype: int64


highBP
no     724
yes    228
Name: highBP, dtype: int64


PhysicallyActive
less than half an hr    336
more than half an hr    272
one hr or more          212
none                    132
Name: PhysicallyActive, dtype: int64


BMI
24.0    111
21.0     88
23.0     76
28.0     71
26.0     66
33.0     64
27.0     63
22.0     58
20.0     48
19.0     36
25.0     34
30.0     33
18.0     32
29.0     28
38.0     28
36.0     20
17.0     16
32.0     16
31.0     16
34.0     12
35.0     12
15.0      8
39.0      4
40.0      4
42.0      3
45.0      1
Name: BMI, dtype: int64


Smoking
no     844
yes    108
Name: Smoking, dtype: int64


Alcohol
no     760
yes    192
Name: Alcohol, dtype: int64


Sleep
7     264
8     256
6     256
5    

In [6]:
diabetes_df=diabetes_df.astype('float32')

In [7]:
diabetes_df.head(5)

Unnamed: 0,Age,Gender,Family_Diabetes,PhysicallyActive,Smoking,Alcohol,RegularMedicine,Pdiabetes,Diabetic,Sleep,Pregancies
0,2.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,2.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
# see the value of each column
print('Value of each column')
for col in diabetes_df.columns:
    print(col)
    print(diabetes_df[col].value_counts())
    print("\n")

Value of each column
Age
0.0    464
1.0    153
2.0    149
3.0    142
Name: Age, dtype: int64


Gender
0.0    563
1.0    345
Name: Gender, dtype: int64


Family_Diabetes
0.0    474
1.0    434
Name: Family_Diabetes, dtype: int64


PhysicallyActive
1.0    317
2.0    253
3.0    208
0.0    130
Name: PhysicallyActive, dtype: int64


Smoking
0.0    802
1.0    106
Name: Smoking, dtype: int64


Alcohol
0.0    721
1.0    187
Name: Alcohol, dtype: int64


RegularMedicine
0.0    584
1.0    324
Name: RegularMedicine, dtype: int64


Pdiabetes
0.0    894
1.0     14
Name: Pdiabetes, dtype: int64


Diabetic
0.0    644
1.0    264
Name: Diabetic, dtype: int64


Sleep
1.0    587
0.0    321
Name: Sleep, dtype: int64


Pregancies
0.0    780
1.0    128
Name: Pregancies, dtype: int64




In [9]:
# Split the data into training and testing sets
diabetes_df_train, diabetes_df_test = train_test_split(diabetes_df, test_size=0.2, random_state=46)

X_train_diabetes_df, y_train_diabetes_df = diabetes_df_train.drop('Diabetic', axis=1), diabetes_df_train['Diabetic']
X_test_diabetes_df, y_test_diabetes_df = diabetes_df_test.drop('Diabetic', axis=1), diabetes_df_test['Diabetic']

X_train_diabetes_df = X_train_diabetes_df.astype('float32')
X_test_diabetes_df = X_test_diabetes_df.astype('float32')

# Build the TensorFlow model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_diabetes_df.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_diabetes_df, y_train_diabetes_df, epochs=10, batch_size=32, validation_split=0.1)
y_pred = model.predict(X_test_diabetes_df)

print('Hasil Evaluasi Model Prediksi')
print(classification_report(y_test_diabetes_df, y_pred.round()))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Hasil Evaluasi Model Prediksi
              precision    recall  f1-score   support

         0.0       0.88      0.90      0.89       119
         1.0       0.80      0.78      0.79        63

    accuracy                           0.86       182
   macro avg       0.84      0.84      0.84       182
weighted avg       0.86      0.86      0.86       182



In [10]:
#Save model
model.save('model.h5')

  saving_api.save_model(


In [11]:
# Load model
loaded_model = load_model('model.h5')

In [12]:
diabetes_df.head(5)

Unnamed: 0,Age,Gender,Family_Diabetes,PhysicallyActive,Smoking,Alcohol,RegularMedicine,Pdiabetes,Diabetic,Sleep,Pregancies
0,2.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,2.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
feature_names = ["Age", "Gender", "Family_Diabetes", "PhysicallyActive", "Pdiabetes", "Smoking", "Alcohol", "Sleep", "RegularMedicine", "Pregancies"]
new_data = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
new_data = pd.DataFrame(new_data, columns=feature_names)
loaded_model.predict(new_data)



array([[0.14681633]], dtype=float32)

In [None]:
model.save('saved_model')

In [None]:
import tensorflow as tf

print("Versi TensorFlow yang sedang berjalan di Colab:", tf.__version__)


Versi TensorFlow yang sedang berjalan di Colab: 2.15.0
