In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelBinarizer, StandardScaler

%matplotlib inline

# Importing and Exploring Data

There are 10 variables:

* sbp: Systolic blood pressure
* tobacco: Cumulative tobacco consumption, in kg
* ldl: Low-density lipoprotein cholesterol
* adiposity: Adipose tissue concentration
* famhist: Family history of heart disease (1=Present, 0=Absent)
* typea: Score on test designed to measure type-A behavior
* obesity: Obesity
* alcohol: Current consumption of alcohol
* age: Age of subject
* chd: Coronary heart disease at baseline; 1=Yes 0=No


Each following row contains the information of one patient. There are 462 samples in total.


In [2]:
data = pd.read_csv('data/heart.csv')
data.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
0,160,12.0,5.73,23.11,Present,49,25.3,97.2,52,1
1,144,0.01,4.41,28.61,Absent,55,28.87,2.06,63,1
2,118,0.08,3.48,32.28,Present,52,29.14,3.81,46,0
3,170,7.5,6.41,38.03,Present,51,31.99,24.26,58,1
4,134,13.6,3.5,27.78,Present,60,25.99,57.34,49,1


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 462 entries, 0 to 461
Data columns (total 10 columns):
sbp          462 non-null int64
tobacco      462 non-null float64
ldl          462 non-null float64
adiposity    462 non-null float64
famhist      462 non-null object
typea        462 non-null int64
obesity      462 non-null float64
alcohol      462 non-null float64
age          462 non-null int64
chd          462 non-null int64
dtypes: float64(5), int64(4), object(1)
memory usage: 36.2+ KB


In [4]:
data.shape

(462, 10)

In [5]:
dummies = pd.get_dummies(data['famhist'],prefix='famhist', drop_first=False)
data = pd.concat([data,dummies], axis=1)
data.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd,famhist_Absent,famhist_Present
0,160,12.0,5.73,23.11,Present,49,25.3,97.2,52,1,0,1
1,144,0.01,4.41,28.61,Absent,55,28.87,2.06,63,1,1,0
2,118,0.08,3.48,32.28,Present,52,29.14,3.81,46,0,0,1
3,170,7.5,6.41,38.03,Present,51,31.99,24.26,58,1,0,1
4,134,13.6,3.5,27.78,Present,60,25.99,57.34,49,1,0,1


In [6]:
data = data.drop(['famhist'], axis=1)
data.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,typea,obesity,alcohol,age,chd,famhist_Absent,famhist_Present
0,160,12.0,5.73,23.11,49,25.3,97.2,52,1,0,1
1,144,0.01,4.41,28.61,55,28.87,2.06,63,1,1,0
2,118,0.08,3.48,32.28,52,29.14,3.81,46,0,0,1
3,170,7.5,6.41,38.03,51,31.99,24.26,58,1,0,1
4,134,13.6,3.5,27.78,60,25.99,57.34,49,1,0,1


In [7]:
# from sklearn.preprocessing import StandardScaler

# scaler = StandardScaler()
# scaled_data = scaler.fit_transform(data)
# scaled_data[:5,:]

In [8]:
inputs=['sbp','tobacco','ldl','adiposity','typea','obesity','alcohol','age']

labels = data['chd']
# min-max scaling
for each in inputs:
    data[each] = ( data[each] - data[each].min() ) / data[each].max()
    
print(data.head())
print(labels.shape)

        sbp   tobacco       ldl  adiposity     typea   obesity   alcohol  \
0  0.270642  0.384615  0.309850   0.385267  0.461538  0.227565  0.660371   
1  0.197248  0.000321  0.223744   0.514709  0.538462  0.304208  0.013996   
2  0.077982  0.002564  0.163079   0.601083  0.500000  0.310004  0.025885   
3  0.316514  0.240385  0.354207   0.736409  0.487179  0.371189  0.164821   
4  0.151376  0.435897  0.164384   0.495175  0.602564  0.242379  0.389565   

        age  chd  famhist_Absent  famhist_Present  
0  0.578125    1               0                1  
1  0.750000    1               1                0  
2  0.484375    0               0                1  
3  0.671875    1               0                1  
4  0.531250    1               0                1  
(462,)


In [9]:
features = data.drop(['chd'], axis=1)
features.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,typea,obesity,alcohol,age,famhist_Absent,famhist_Present
0,0.270642,0.384615,0.30985,0.385267,0.461538,0.227565,0.660371,0.578125,0,1
1,0.197248,0.000321,0.223744,0.514709,0.538462,0.304208,0.013996,0.75,1,0
2,0.077982,0.002564,0.163079,0.601083,0.5,0.310004,0.025885,0.484375,0,1
3,0.316514,0.240385,0.354207,0.736409,0.487179,0.371189,0.164821,0.671875,0,1
4,0.151376,0.435897,0.164384,0.495175,0.602564,0.242379,0.389565,0.53125,0,1


In [10]:
# labels.head()
# binarizer = LabelBinarizer()
# # labels = binarizer.fit(lables)

# a=tf.one_hot([1,0,1], 2)
# with tf.Session as sess:
#     sess.run()

In [11]:
features, labels = np.array(features), np.array(labels)
print(len(features), len(labels))

462 462


# Spliting into training and testing data

In [12]:
# fraction of examples to keep for training
split_frac = 0.8
n_records = len(features)
split_idx = int(split_frac*n_records)

In [13]:
# features[:5]

array([[  2.70642202e-01,   3.84615385e-01,   3.09849967e-01,
          3.85267122e-01,   4.61538462e-01,   2.27565479e-01,
          6.60370949e-01,   5.78125000e-01,   0.00000000e+00,
          1.00000000e+00],
       [  1.97247706e-01,   3.20512821e-04,   2.23744292e-01,
          5.14709343e-01,   5.38461538e-01,   3.04207815e-01,
          1.39955160e-02,   7.50000000e-01,   1.00000000e+00,
          0.00000000e+00],
       [  7.79816514e-02,   2.56410256e-03,   1.63078930e-01,
          6.01082608e-01,   5.00000000e-01,   3.10004294e-01,
          2.58849107e-02,   4.84375000e-01,   0.00000000e+00,
          1.00000000e+00],
       [  3.16513761e-01,   2.40384615e-01,   3.54207436e-01,
          7.36408567e-01,   4.87179487e-01,   3.71189352e-01,
          1.64820980e-01,   6.71875000e-01,   0.00000000e+00,
          1.00000000e+00],
       [  1.51376147e-01,   4.35897436e-01,   1.64383562e-01,
          4.95175335e-01,   6.02564103e-01,   2.42378703e-01,
          3.89564508e-01

In [14]:
train_X, train_Y = features[:split_idx], labels[:split_idx]
test_X, test_Y = features[split_idx:], labels[split_idx:]

# Building the Tensorflow model

In [15]:
n_labels= 2
n_features = 10

In [31]:
#hyperparameters

learning_rate = 0.1
n_epochs= 200
n_hidden1 = 5
# batch_size = 128
# display_step = 1

In [32]:
def build_model():
    
    tf.reset_default_graph()

    inputs = tf.placeholder(tf.float32,[None, 10], name ='inputs' )
    labels = tf.placeholder(tf.int32, [None,], name='output')
    labels_one_hot = tf.one_hot(labels, 2)
    
    weights = {
        'hidden_layer': tf.Variable(tf.truncated_normal([n_features,n_hidden1], stddev=0.1)),
        'output':tf.Variable(tf.truncated_normal([n_hidden1, n_labels], stddev=0.1))
    }
    
    bias = {
        'hidden_layer':tf.Variable(tf.zeros([n_hidden1])),
        'output':tf.Variable(tf.zeros(n_labels))
    }
    
    hidden_layer = tf.nn.bias_add(tf.matmul(inputs,weights['hidden_layer']), bias['hidden_layer'])
    hidden_layer = tf.nn.relu(hidden_layer)
    
    logits = tf.nn.bias_add(tf.matmul(hidden_layer, weights['output']), bias['output'])
    
    entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels_one_hot)
    cost = tf.reduce_mean(entropy)
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
    
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        
        sess.run(init)
        
        #tensorboard
        file_writer = tf.summary.FileWriter('./logs/1', sess.graph)
        
        for epoch in range(n_epochs):
            
            _, loss = sess.run([optimizer, cost], feed_dict={inputs:train_X, labels:train_Y})
           
            print("Epoch: {0} ; training loss: {1}".format(epoch, loss))
            
        print('training finished')
        
         # testing the model on test data
            
#         test_loss,logits = sess.run([loss,logits],feed_dict={inputs:test_X,labels:test_Y})
        
#         predictions = tf.nn.softmax(logits)
        
#         correct_preds = tf.equal(tf.argmax(predictions, 1), tf.argmax(tf.one_hot(test_Y), 1))
#         accuracy = tf.reduce_mean(tf.cast(correct_preds, tf.float32)) 
        
#         print('model accuracy : {}'.format(accuracy))
        
        # Test model
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels_one_hot, 1))
        # Calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        print("Accuracy:", accuracy.eval({inputs: test_X, labels: test_Y}))


In [33]:
build_model()

Epoch: 0 ; training loss: 0.6902515292167664
Epoch: 1 ; training loss: 0.6863057017326355
Epoch: 2 ; training loss: 0.6827540993690491
Epoch: 3 ; training loss: 0.6795487999916077
Epoch: 4 ; training loss: 0.6766538619995117
Epoch: 5 ; training loss: 0.674036979675293
Epoch: 6 ; training loss: 0.6716666221618652
Epoch: 7 ; training loss: 0.6695170402526855
Epoch: 8 ; training loss: 0.6675642728805542
Epoch: 9 ; training loss: 0.6657883524894714
Epoch: 10 ; training loss: 0.6641700863838196
Epoch: 11 ; training loss: 0.6626936197280884
Epoch: 12 ; training loss: 0.6613407135009766
Epoch: 13 ; training loss: 0.6601017117500305
Epoch: 14 ; training loss: 0.6589651107788086
Epoch: 15 ; training loss: 0.6579157710075378
Epoch: 16 ; training loss: 0.656948447227478
Epoch: 17 ; training loss: 0.6560526490211487
Epoch: 18 ; training loss: 0.6552185416221619
Epoch: 19 ; training loss: 0.6544349789619446
Epoch: 20 ; training loss: 0.6536989212036133
Epoch: 21 ; training loss: 0.6530072689056396


# Name scoping 

In [35]:
def build_model_2():
    
    tf.reset_default_graph()
    
    with tf.name_scope('inputs'):

        inputs = tf.placeholder(tf.float32,[None, 10], name ='inputs' )
        
    with tf.name_scope('target_labels'):
        labels = tf.placeholder(tf.int32, [None,], name='output')
        labels_one_hot = tf.one_hot(labels, 2)
    
    with tf.name_scope('weights'):
        weights = {
            'hidden_layer': tf.Variable(tf.truncated_normal([n_features,n_hidden1], stddev=0.1), name='hidden_weights'),
            'output':tf.Variable(tf.truncated_normal([n_hidden1, n_labels], stddev=0.1), name='output_weights')
        }
    
    with tf.name_scope('biases'):
    
        bias = {
            'hidden_layer':tf.Variable(tf.zeros([n_hidden1]), name='hidden_biases'),
            'output':tf.Variable(tf.zeros(n_labels), name='output_biases')
        }
        
    with tf.name_scope('hidden_layers'):

        hidden_layer = tf.nn.bias_add(tf.matmul(inputs,weights['hidden_layer']), bias['hidden_layer'])
        hidden_layer = tf.nn.relu(hidden_layer, name='hidden_layer_output')
        
    with tf.name_scope('predictions'):

        logits = tf.nn.bias_add(tf.matmul(hidden_layer, weights['output']), bias['output'], name='predictions')
    
    with tf.name_scope('cost'):
        entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels_one_hot, name='cross_entropy')
        cost = tf.reduce_mean(entropy, name='cost')
    
    with tf.name_scope('train'):
        optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)

    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        
        sess.run(init)
        
        #tensorboard
        file_writer = tf.summary.FileWriter('./logs/2', sess.graph)
        
        for epoch in range(n_epochs):
            
            _, loss = sess.run([optimizer, cost], feed_dict={inputs:train_X, labels:train_Y})
           
            print("Epoch: {0} ; training loss: {1}".format(epoch, loss))
            
        print('training finished')
        
         # testing the model on test data
            
#         test_loss,logits = sess.run([loss,logits],feed_dict={inputs:test_X,labels:test_Y})
        
#         predictions = tf.nn.softmax(logits)
        
#         correct_preds = tf.equal(tf.argmax(predictions, 1), tf.argmax(tf.one_hot(test_Y), 1))
#         accuracy = tf.reduce_mean(tf.cast(correct_preds, tf.float32)) 
        
#         print('model accuracy : {}'.format(accuracy))
        
        # Test model
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels_one_hot, 1))
        # Calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        print("Accuracy:", accuracy.eval({inputs: test_X, labels: test_Y}))

In [37]:
model2 = build_model_2()

Epoch: 0 ; training loss: 0.6895103454589844
Epoch: 1 ; training loss: 0.6854351758956909
Epoch: 2 ; training loss: 0.6817319989204407
Epoch: 3 ; training loss: 0.678347647190094
Epoch: 4 ; training loss: 0.6752737164497375
Epoch: 5 ; training loss: 0.6724792718887329
Epoch: 6 ; training loss: 0.669940710067749
Epoch: 7 ; training loss: 0.6676146388053894
Epoch: 8 ; training loss: 0.6654449105262756
Epoch: 9 ; training loss: 0.663406252861023
Epoch: 10 ; training loss: 0.6615357995033264
Epoch: 11 ; training loss: 0.659821093082428
Epoch: 12 ; training loss: 0.6582509875297546
Epoch: 13 ; training loss: 0.6568374633789062
Epoch: 14 ; training loss: 0.6555294990539551
Epoch: 15 ; training loss: 0.6543349623680115
Epoch: 16 ; training loss: 0.6532660722732544
Epoch: 17 ; training loss: 0.6522902250289917
Epoch: 18 ; training loss: 0.6513990163803101
Epoch: 19 ; training loss: 0.6505838632583618
Epoch: 20 ; training loss: 0.6498392820358276
Epoch: 21 ; training loss: 0.6491523385047913
Ep

# Visualising weights and distributions 

In [46]:
def build_model_3():
    
    tf.reset_default_graph()
    
    with tf.name_scope('inputs'):

        inputs = tf.placeholder(tf.float32,[None, 10], name ='inputs' )
        
    with tf.name_scope('target_labels'):
        labels = tf.placeholder(tf.int32, [None,], name='output')
        labels_one_hot = tf.one_hot(labels, 2)
    
    with tf.name_scope('weights'):
        weights = {
            'hidden_layer': tf.Variable(tf.truncated_normal([n_features,n_hidden1], stddev=0.1), name='hidden_weights'),
            'output':tf.Variable(tf.truncated_normal([n_hidden1, n_labels], stddev=0.1), name='output_weights')
        }
        
        tf.summary.histogram('hidden_weights', weights['hidden_layer'])
        tf.summary.histogram('output_weights', weights['output'])
    
    with tf.name_scope('biases'):
    
        bias = {
            'hidden_layer':tf.Variable(tf.zeros([n_hidden1]), name='hidden_biases'),
            'output':tf.Variable(tf.zeros(n_labels), name='output_biases')
        }
        
        tf.summary.histogram('hidden_biases', bias['hidden_layer'])
        tf.summary.histogram('output_biases', bias['output'])
        
    with tf.name_scope('hidden_layers'):

        hidden_layer = tf.nn.bias_add(tf.matmul(inputs,weights['hidden_layer']), bias['hidden_layer'])
        hidden_layer = tf.nn.relu(hidden_layer, name='hidden_layer_output')
        
    with tf.name_scope('predictions'):

        logits = tf.nn.bias_add(tf.matmul(hidden_layer, weights['output']), bias['output'], name='logits')
        pred = tf.nn.softmax(logits, name='predictions')
        tf.summary.histogram('predictions', pred)
    
    with tf.name_scope('cost'):
        entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels_one_hot, name='cross_entropy')
        cost = tf.reduce_mean(entropy, name='cost')
        tf.summary.scalar('cost', cost)
    
    with tf.name_scope('train'):
        optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
        
        
    merged = tf.summary.merge_all()
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        
        sess.run(init)
        
        #tensorboard
        train_writer = tf.summary.FileWriter('./logs/3', sess.graph)
        
        for epoch in range(n_epochs):
            
            summary,_, loss = sess.run([merged,optimizer, cost], feed_dict={inputs:train_X, labels:train_Y})
           
            print("Epoch: {0} ; training loss: {1}".format(epoch, loss))
            
            train_writer.add_summary(summary, epoch+1)
            
        print('training finished')
        
         # testing the model on test data
            
#         test_loss,logits = sess.run([loss,logits],feed_dict={inputs:test_X,labels:test_Y})
        
#         predictions = tf.nn.softmax(logits)
        
#         correct_preds = tf.equal(tf.argmax(predictions, 1), tf.argmax(tf.one_hot(test_Y), 1))
#         accuracy = tf.reduce_mean(tf.cast(correct_preds, tf.float32)) 
        
#         print('model accuracy : {}'.format(accuracy))
        
        # Test model
        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels_one_hot, 1))
        # Calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        print("Accuracy:", accuracy.eval({inputs: test_X, labels: test_Y}))

In [47]:
model3 = build_model_3()

Epoch: 0 ; training loss: 0.6924264430999756
Epoch: 1 ; training loss: 0.6878156661987305
Epoch: 2 ; training loss: 0.6836753487586975
Epoch: 3 ; training loss: 0.6799539923667908
Epoch: 4 ; training loss: 0.6766065955162048
Epoch: 5 ; training loss: 0.6735934615135193
Epoch: 6 ; training loss: 0.67087721824646
Epoch: 7 ; training loss: 0.6684249639511108
Epoch: 8 ; training loss: 0.6662085652351379
Epoch: 9 ; training loss: 0.6642038822174072
Epoch: 10 ; training loss: 0.6623884439468384
Epoch: 11 ; training loss: 0.6607438921928406
Epoch: 12 ; training loss: 0.65925133228302
Epoch: 13 ; training loss: 0.6578938364982605
Epoch: 14 ; training loss: 0.6566564440727234
Epoch: 15 ; training loss: 0.6555258631706238
Epoch: 16 ; training loss: 0.6544875502586365
Epoch: 17 ; training loss: 0.653531014919281
Epoch: 18 ; training loss: 0.6526522636413574
Epoch: 19 ; training loss: 0.6518397331237793
Epoch: 20 ; training loss: 0.6510838866233826
Epoch: 21 ; training loss: 0.6503826975822449
Epo