In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_hub as hub
import seaborn as sns
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from imblearn.over_sampling import RandomOverSampler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression

#https://www.kaggle.com/datasets/mathchi/diabetes-data-set
df = pd.read_csv('/content/clean-diabetes.csv')

In [None]:
#For measuring runtime
start = time.time()
print(23*2.3)

In [None]:
df.head()

In [None]:
def plot_confusion_matrix(cm, title):
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.title(title)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

In [None]:
#Feature selection
X = df[df.columns[:-1]].values
y = df[df.columns[-1]].values

In [None]:
#Split dataset into training and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=0)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=0)

In [None]:
#Beforenormalization
#Confusion Matrix
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

#Prediction
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

#Evaluate
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nAccuracy Score:")
print(accuracy_score(y_test, y_pred))

In [None]:
#Before normalization
#Confusion Matrix Plot
plot_confusion_matrix(cm, 'Confusion Matrix Before Normalization')

In [None]:
# Check for missing values
missing_values = df.isnull().sum()

# Check for duplicates
duplicates = df.duplicated().sum()

missing_values, duplicates

In [None]:
#Feature scaling
scaler = StandardScaler()
X = scaler.fit_transform(X)
data = np.hstack((X, np.reshape(y,(-1,1))))
transformed_df = pd.DataFrame(data, columns = df.columns)

#Balances dataset
over = RandomOverSampler()
X,y = over.fit_resample(X,y)

In [None]:
#Split dataset into training and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=0)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=0)

In [None]:
#Data visualization
for i in range(len(df.columns[:-1])):
  label = df.columns[i]
  plt.hist(transformed_df[transformed_df['Outcome']==1][label], color='red',label="Diabetes",alpha=0.7,density=True,bins=15)
  plt.hist(transformed_df[transformed_df['Outcome']==0][label], color='blue',label="No Diabetes",alpha=0.7,density=True,bins=15)
  plt.title(label)
  plt.ylabel("Probability")
  plt.xlabel(label)
  plt.legend()
  plt.show()

In [None]:
#Create model
model = tf.keras.models.Sequential([
              tf.keras.layers.Dense(16, activation='relu'),
              tf.keras.layers.Dense(16, activation='relu'),
              tf.keras.layers.Dense(1, activation='sigmoid'),
])

In [None]:
#For data visualization
def plot_history(history):
    # Plot training & validation accuracy values
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(['Train', 'Validation'], loc='upper left')

    # Plot training & validation loss values
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(['Train', 'Validation'], loc='upper left')

    plt.tight_layout()
    plt.show()

In [None]:
#model.evaluate(X_valid,y_valid)
#model.evaluate(X_train,y_train)
#model.evaluate(X_test,y_test)

In [None]:
adam_time1 = time.time()

In [None]:
#Adam optimizer
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.001),
              loss = tf.keras.losses.BinaryCrossentropy(),
              metrics = ['accuracy'])

m = model.fit(X_train,y_train,batch_size=16,epochs=50,validation_data = (X_valid,y_valid))
plot_history(m)

In [None]:
adam_time2 = time.time()

In [None]:
adamax_time1 = time.time()

In [None]:
#Adamax optimizer
model.compile(optimizer = tf.keras.optimizers.Adamax(learning_rate=0.001),
              loss = tf.keras.losses.BinaryCrossentropy(),
              metrics = ['accuracy'])

m = model.fit(X_train,y_train,batch_size=16,epochs=50,validation_data = (X_valid,y_valid))
plot_history(m)

In [None]:
adamax_time2 = time.time()

In [None]:
sgd_time1 = time.time()

In [None]:
#Sgd optimizer
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

m = model.fit(X_train,y_train,batch_size=16,epochs=50,validation_data = (X_valid,y_valid))
plot_history(m)

In [None]:
sgd_time2 = time.time()

In [None]:
ftrl_time1 = time.time()

In [None]:
#Ftrl optimizer
model.compile(optimizer = tf.keras.optimizers.Ftrl(learning_rate=0.001),
              loss = tf.keras.losses.BinaryCrossentropy(),
              metrics = ['accuracy'])

m = model.fit(X_train,y_train,batch_size=16,epochs=50,validation_data = (X_valid,y_valid))
plot_history(m)

In [None]:
ftrl_time2 = time.time()

In [None]:
nadam_time1 = time.time()

In [None]:
#Nadam optimizer
model.compile(optimizer = tf.keras.optimizers.Nadam(learning_rate=0.001),
              loss = tf.keras.losses.BinaryCrossentropy(),
              metrics = ['accuracy'])

m = model.fit(X_train,y_train,batch_size=16,epochs=50,validation_data = (X_valid,y_valid))
plot_history(m)

In [None]:
nadam_time2 = time.time()

In [None]:
#After normalization
#Confusion Matrix
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

#Prediction
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

#Evaluate
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nAccuracy Score:")
print(accuracy_score(y_test, y_pred))

In [None]:
#After normalization
#Confusion Matrix Plot
plot_confusion_matrix(cm, 'Confusion Matrix After Normalization')

In [None]:
#Runtime measuring
end = time.time()
print(end - start)

In [None]:
print("Adam runtime:",adam_time2-adam_time1)
print("Adamax runtime:",adamax_time2-adamax_time1)
print("Sgd runtime:",sgd_time2-sgd_time1)
print("Ftrl runtime:",ftrl_time2-ftrl_time1)
print("Nadam runtime:",nadam_time2-nadam_time1)