In [0]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

In [0]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split

In [42]:
dataset_url = 'heart.csv'
data = pd.read_csv(dataset_url)
print(data.head())

   row.names  sbp  tobacco   ldl  adiposity  ... typea  obesity  alcohol  age  chd
0          1  160    12.00  5.73      23.11  ...    49    25.30    97.20   52    1
1          2  144     0.01  4.41      28.61  ...    55    28.87     2.06   63    1
2          3  118     0.08  3.48      32.28  ...    52    29.14     3.81   46    0
3          4  170     7.50  6.41      38.03  ...    51    31.99    24.26   58    1
4          5  134    13.60  3.50      27.78  ...    60    25.99    57.34   49    1

[5 rows x 11 columns]


Split data into training set, test set, and validation set

In [57]:
train, test = train_test_split(data, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

TRAIN_LEN = len(train)
VAL_LEN = len(val)
TEST_LEN = len(test)

295 train examples
74 validation examples
93 test examples


Pre-process Data

In [58]:
# A utility method to create a tf.data dataset from a Pandas Dataframe
# This function was taken from https://www.tensorflow.org/tutorials/structured_data/feature_columns#create_a_feature_layer
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('chd')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

train_ds = df_to_dataset(train)
val_ds = df_to_dataset(val, shuffle=False)
test_ds = df_to_dataset(test, shuffle=False)

for feature_batch, label_batch in train_ds.take(1):
  print('Every feature:', list(feature_batch.keys()))
  print('A batch of ages:', feature_batch['age'])
  print('A batch of targets:', label_batch )

feature_columns = []

# numeric cols
for header in ['row.names', 'sbp', 'tobacco', 'ldl', 'adiposity', 'typea', 'obesity', 'alcohol', 'age']:
  feature_columns.append(tf.feature_column.numeric_column(header))

# categorical col
data["famhist"] = data["famhist"].apply(str)
famhist = tf.feature_column.categorical_column_with_vocabulary_list(
      'famhist', ['0', '1'])
famhist_one_hot = tf.feature_column.indicator_column(famhist)
feature_columns.append(famhist_one_hot)

Every feature: ['row.names', 'sbp', 'tobacco', 'ldl', 'adiposity', 'famhist', 'typea', 'obesity', 'alcohol', 'age']
A batch of ages: tf.Tensor(
[37 55 53 59 39 34 18 26 60 48 40 40 26 42 55 16 62 61 63 17 45 23 38 17
 56 55 59 40 60 36 24 42], shape=(32,), dtype=int32)
A batch of targets: tf.Tensor([0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 1 0 0 0 1 1 1 0 0 0 0 0], shape=(32,), dtype=int32)


Building and Training the Model

In [60]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

batch_size = 32
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)


model = tf.keras.Sequential([
  feature_layer,
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(rate=0.2),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(train_ds,
          validation_data=val_ds,
          epochs=20)

print(f"---- Evaluate TEST SET ----")
loss, accuracy = model.evaluate(test_ds)
print(f"Model Loss:    {loss:.2f}")
print(f"Model Accuracy: {accuracy*100:.1f}%")

print(f"---- Evaluate TRAIN SET ----")
loss, accuracy = model.evaluate(train_ds)
print(f"Model Loss:    {loss:.2f}")
print(f"Model Accuracy: {accuracy*100:.1f}%")

print(f"---- Evaluate VALIDATION SET ----")
loss, accuracy = model.evaluate(val_ds)
print(f"Model Loss:    {loss:.2f}")
print(f"Model Accuracy: {accuracy*100:.1f}%")



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
---- Evaluate TEST SET ----
Model Loss:    0.80
Model Accuracy: 59.1%
---- Evaluate TRAIN SET ----
Model Loss:    0.63
Model Accuracy: 70.5%
---- Evaluate VALIDATION SET ----
Model Loss:    0.80
Model Accuracy: 64.9%
