In [1]:
#Updated Model from the initial one created 

In [2]:
#Updates include the following:
# 1. clean data
# 2. connection to a database that houses the csv file
# 3. Scaled the data
# 4. Define the model- deep neural net
# 5. Added 3 hidden layers 
# 6. Compile the model 
# 7. Train the model
# 8. Evaluate the loss and accuracy metrics


In [3]:
# Future changes:
# 1. Adding more hidden layers 
# 2. Running the model with a different number of features to see if accuracy and loss are affected
# 3. Altering aspects such as activation functions to see if model accuracy improves. 

In [4]:
# Import our dependencies
import pandas as pd
import matplotlib as plt
import sklearn as skl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import tensorflow as tf
import psycopg2
import sqlalchemy
from sqlalchemy import create_engine

In [7]:
# Postgres username, password, and database name (change accordingly)
postgres_address = 'localhost'
postgres_port = '5432' 
postgres_username = 'postgres' 
postgres_password = '********'
postgres_dbname = 'cardio_disease_db'
# A long string that contains the necessary Postgres login information
postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'
                .format(username=postgres_username,
                        password=postgres_password,
                        ipaddress=postgres_address,
                        port=postgres_port,
                        dbname=postgres_dbname))
# Create the connection
cnx = create_engine(postgres_str)

In [11]:
# Load the data
cardio_train_df = pd.read_sql_query('''SELECT * FROM cardio_data_clean;''', cnx)
cardio_train_df.head()

Unnamed: 0,id_no,age,gender,height,weight,ap_hi,ap_lo,cholesterol,glucose,smoke,alcohol,active,cardio
0,0,50,2,168,62.0,110,80,1,1,False,False,True,False
1,1,55,1,156,85.0,140,90,3,1,False,False,True,True
2,2,52,1,165,64.0,130,70,3,1,False,False,False,True
3,3,48,2,169,82.0,150,100,1,1,False,False,True,True
4,4,48,1,156,56.0,100,60,1,1,False,False,False,False


In [6]:
# Define feature data
# features: age, gender, height, weight, ap_hi, 
# ap_lo, cholesterol, gluc, smoke, alco, active 
X = cardio_train_df.drop(['id','cardio'],1).values

In [7]:
# Define target data
# cardio (if the person has cardiovascular disease or not) 
y = cardio_train_df['cardio']

In [8]:
# Create training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [9]:
# Create a StandardScaler instance
X_scaler = StandardScaler()

In [10]:
# Fit the StandardScaler
X_scaler.fit(X_train)

StandardScaler()

In [11]:
# Scale the features data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  15
hidden_nodes_layer2 = 8
hidden_nodes_layer3 = 8

nn_model = tf.keras.models.Sequential()

In [13]:
# First hidden layer
nn_model.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

In [14]:
# Second hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

In [15]:
# Third hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))

In [16]:
# Output layer
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

In [17]:
# Check the structure of the model
nn_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 15)                180       
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 128       
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 72        
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 9         
Total params: 389
Trainable params: 389
Non-trainable params: 0
_________________________________________________________________


In [18]:
# Compile the model
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [19]:
# Train the model
fit_model = nn_model.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78