In [77]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

In [4]:
columns = ['age','sex','cp','trestbps','chol','fbs','restecg',
           'thalach','exang','oldpeak','slope','ca','thal','num']

data = np.zeros((1, len(columns)))

for file in ['../UCI_data/processed.va.csv', '../UCI_data/processed.switzerland.csv', 
             '../UCI_data/processed.cleveland.csv', '../UCI_data/processed.hungarian.csv']:
    data = np.vstack((data, np.genfromtxt(file, delimiter=',')))
data = data[1:]
df = pd.DataFrame(data=data, columns=columns, index=None)

In [5]:
non_na_df = df[(~df.isna()).all(axis=1)]

In [34]:
X = non_na_df.drop(columns="num")
y = non_na_df["num"].astype(int)
y.loc[y > 0] = 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [135]:
class Encoder(layers.Layer):
    def __init__(self,
                 intermidiate_1_dim,
                 intermidiate_2_dim,
                 latent_dim,
                 name='encoder',
                 **kwargs):
        super(Encoder, self).__init__(name=name, **kwargs)
        
        self.batch_normalization_1 = layers.BatchNormalization()
        self.batch_normalization_2 = layers.BatchNormalization()
        
        self.inp = tf.keras.Input(shape=n)
        
        self.layer_1 = layers.Dense(intermidiate_1_dim, activation='tanh')
        self.layer_2 = layers.Dense(intermidiate_2_dim, activation='tanh')
        self.layer_3 = layers.Dense(latent_dim, activation='tanh')

    def call(self, inputs):
        bn_1 = self.batch_normalization_1(inputs)
        
        layer_1 = self.layer_1(bn_1)
        layer_2 = self.layer_2(layer_1)
        
        bn_2 = self.batch_normalization_2(layer_2)
        
        return self.layer_3(bn_2)


class Decoder(layers.Layer):
    def __init__(self,
                 intermidiate_1_dim,
                 intermidiate_2_dim,
                 original_dim,
                 name='decoder',
                 **kwargs):
        super(Decoder, self).__init__(name=name, **kwargs)
        
        self.batch_normalization_3 = layers.BatchNormalization()
        self.batch_normalization_4 = layers.BatchNormalization()
        
        self.layer_4 = layers.Dense(intermidiate_2_dim, activation='tanh')
        self.layer_5 = layers.Dense(intermidiate_1_dim, activation='tanh')
        self.layer_6 = layers.Dense(original_dim, activation='tanh')

    def call(self, inputs):
        bn_3 = self.batch_normalization_3(inputs)
        
        layer_4 = self.layer_4(bn_3)
        layer_5 = self.layer_5(layer_4)
        
        bn_4 = self.batch_normalization_4(layer_5)
        
        return self.layer_6(bn_4)


class BottleneckAutoEncoder(tf.keras.Model):
    def __init__(self,
                 original_dim,
                 name='autoencoder',
                 **kwargs):
        super(BottleneckAutoEncoder, self).__init__(name=name, **kwargs)
        
        self.original_dim = original_dim
        
        intermidiate_1_dim = original_dim / 2
        intermidiate_2_dim = original_dim / 4
        latent_dim = 2
        
        self.encoder = Encoder(
            intermidiate_1_dim=intermidiate_1_dim,
            intermidiate_2_dim=intermidiate_2_dim,
            latent_dim=latent_dim
        )
        self.decoder = Decoder(
            intermidiate_1_dim=intermidiate_1_dim,
            intermidiate_2_dim=intermidiate_2_dim,
            original_dim=original_dim
        )

    def call(self, inputs):
        latent = self.encoder(inputs)
        
        self.latent = latent
        
        reconstructed = self.decoder(latent)
        return reconstructed
    
    def get_latent_layer(self):
        return self.latent

In [147]:
bae = BottleneckAutoEncoder(len(X.columns))

In [148]:
Adam_Opt = tf.keras.optimizers.Adam(lr=l2_reg)
bae.compile(optimizer=Adam_Opt,
            loss="mean_squared_error",
            metrics=["mean_squared_error"])

In [149]:
bae.fit(
    tf.constant(X_train.values, dtype = tf.float32, shape=X_train.shape), 
    tf.constant(X_train.values, dtype = tf.float32, shape=X_train.shape), 
    epochs=200,
    validation_data=(
        tf.constant(X_test.values, dtype = tf.float32, shape=X_test.shape),
        tf.constant(X_test.values, dtype = tf.float32, shape=X_test.shape)
    )
)

Train on 239 samples, validate on 60 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200


Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200


Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200


Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200


Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<tensorflow.python.keras.callbacks.History at 0x14c381610>

In [93]:
y_pred = np.argmax(model.predict(tf.constant(X_test.values, dtype = tf.float32, shape=X_test.shape)), axis=1)

In [95]:
roc_auc_score(y_test.values, y_pred), accuracy_score(y_test.values, y_pred), f1_score(y_test.values, y_pred) 

(0.5707070707070707, 0.5833333333333334, 0.6478873239436619)

In [152]:
bae(tf.constant(X.values, dtype = tf.float32, shape=X.shape))

<tf.Tensor: id=117903, shape=(299, 13), dtype=float32, numpy=
array([[ 0.28835478,  0.14486174,  0.4832785 , ..., -0.11703872,
         0.05467898,  0.518913  ],
       [ 0.03485138,  0.43346864, -0.4023548 , ...,  0.6122683 ,
         0.62816334, -0.49825034],
       [-0.06248675,  0.5431028 , -0.16400601, ...,  0.41417527,
         0.50028086, -0.45568088],
       ...,
       [ 0.29214686,  0.13922442,  0.47407654, ..., -0.10471132,
         0.0624101 ,  0.5170502 ],
       [ 0.36040697,  0.03977748,  0.23065975, ...,  0.17707574,
         0.24233076,  0.45218962],
       [-0.07093074,  0.55709326, -0.07928354, ...,  0.343726  ,
         0.43524945, -0.40502462]], dtype=float32)>

In [153]:
bae.get_latent_layer()

<tf.Tensor: id=117858, shape=(299, 2), dtype=float32, numpy=
array([[ 2.36965805e-01, -9.91128027e-01],
       [-5.11072241e-02,  9.74943161e-01],
       [ 2.18536016e-02,  9.46736932e-01],
       [ 6.07715361e-02,  2.00155467e-01],
       [ 6.68314248e-02,  7.42450297e-01],
       [ 1.56383216e-01, -6.49222910e-01],
       [ 2.04190254e-01, -9.81466591e-01],
       [ 4.38859537e-02,  9.15878952e-01],
       [ 9.38637257e-02, -1.15944847e-01],
       [ 5.18920608e-02,  2.34975427e-01],
       [-1.11010056e-02,  9.43449318e-01],
       [ 2.00454339e-01, -9.46290135e-01],
       [-2.53561810e-02,  9.57427323e-01],
       [ 1.00601120e-02,  3.23259503e-01],
       [ 1.65159792e-01, -9.53274488e-01],
       [ 1.96775660e-01, -9.05523419e-01],
       [ 2.24821687e-01, -9.44415033e-01],
       [ 1.94909677e-01, -9.67077017e-01],
       [ 1.76158771e-01, -8.88261199e-01],
       [ 9.24243405e-02, -1.35587290e-01],
       [ 1.73688769e-01, -9.38286185e-01],
       [ 1.85741857e-01, -8.78505707