In [131]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow_hub as hub

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from imblearn.over_sampling import RandomOverSampler

In [None]:
df = pd.read_csv("diabetes.csv")
df.head() #see dataFrame

In [None]:
df[df['Outcome'] == 0] #all outcomes that = 0 (Diabetes Negative)

In [None]:
df[df['Outcome'] == 1] #all outcomes that = 1 (Diabetes Positive)

In [None]:
for i in range(len(df.columns[:-1])): #display data for each column in data
  label = df.columns[i]
  plt.hist(df[df['Outcome'] == 1][label], color = 'blue', label = 'Diabetes Pos', alpha = 0.7, density = True, bins = 15)
  plt.hist(df[df['Outcome'] == 0][label], color = 'red', label = 'Diabetes Neg', alpha = 0.7, density = True, bins = 15) #alpha makes it easier to see variations
  plt.title(label)
  plt.ylabel('Probability')
  plt.xlabel(label)
  plt.legend()
  plt.show()


In [60]:
print("Diabetes Negative: {}".format(len(df[df['Outcome'] == 0])))
print("Diabetes Positive: {}".format(len(df[df['Outcome'] == 1])))

Diabetes Negative: 500
Diabetes Positive: 268


In [138]:
x = df[df.columns[:-1]].values
y = df[df.columns[-1]].values

In [142]:
scalar = StandardScaler()
x = scalar.fit_transform(x)
data = np.hstack((x, np.reshape(y, (-1, 1))))
transformed_df = pd.DataFrame(data, columns=df.columns)

In [145]:
over = RandomOverSampler() #balances dataset
x, y = over.fit_resample(x,y)
data = np.hstack((x, np.reshape(y, (-1, 1))))
transformed_df = pd.DataFrame(data, columns=df.columns)

In [146]:
len(transformed_df[transformed_df['Outcome'] == 1]), len(transformed_df[transformed_df['Outcome'] == 0])

(500, 500)

In [147]:
x_train, x_temp, y_train, y_temp = train_test_split(x, y, test_size=0.4, random_state = 0) #0.4 = 40% of dataset
x_valid, x_test, y_valid, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=0) #0.5 = 50% of 40% of dataset

In [148]:
model = tf.keras.Sequential([
                              tf.keras.layers.Dense(16, activation='relu'), #Dense, each neuron per layer recieves all previous neuron data from prev layer
                              tf.keras.layers.Dense(16, activation='relu'),
                              tf.keras.layers.Dense(1, activation="sigmoid")
])

In [149]:
model.compile(
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy']
)

In [150]:
model.evaluate(x, y)

 1/32 [..............................] - ETA: 7s - loss: 0.7108 - accuracy: 0.5312

  return dispatch_target(*args, **kwargs)




[0.6795917749404907, 0.5690000057220459]

In [151]:
model.evaluate(x_train, y_train)



[0.6835008859634399, 0.5666666626930237]

In [152]:
model.evaluate(x_valid, y_valid)



[0.6659496426582336, 0.5950000286102295]

In [153]:
model.fit(x_train, y_train, batch_size=16, epochs=20, validation_data=(x_valid, y_valid))

Epoch 1/20


  return dispatch_target(*args, **kwargs)


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f79c75e18d0>

In [154]:
model.evaluate(x_test, y_test)



[0.4611673057079315, 0.8050000071525574]