Importing Libraries

In [28]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn

In [29]:
import tensorflow as tf
import keras

In [30]:
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.activations import relu, sigmoid
from keras.initializers import he_normal

Data pre-processing

In [31]:
data = pd.read_csv('toy_dataset.csv')

In [32]:
data.head()

Unnamed: 0,Number,City,Gender,Age,Income,Illness
0,1,Dallas,Male,41,40367.0,No
1,2,Dallas,Male,54,45084.0,No
2,3,Dallas,Male,42,52483.0,No
3,4,Dallas,Male,40,40941.0,No
4,5,Dallas,Male,46,50289.0,No


In [33]:
data.isnull().sum()

Number     0
City       0
Gender     0
Age        0
Income     0
Illness    0
dtype: int64

In [34]:
data.City.value_counts()

New York City      50307
Los Angeles        32173
Dallas             19707
Mountain View      14219
Austin             12292
Boston              8301
Washington D.C.     8120
San Diego           4881
Name: City, dtype: int64

In [35]:
data.Illness.value_counts()

No     137861
Yes     12139
Name: Illness, dtype: int64

In [36]:
data.City.unique()

array(['Dallas', 'New York City', 'Los Angeles', 'Mountain View',
       'Boston', 'Washington D.C.', 'San Diego', 'Austin'], dtype=object)

In [37]:
data = data.drop(columns = ['Number'])

In [38]:
replacement_dict = {
    'Male': 1,
    'Female': 0
}

replacement_dict2 = {
    'New York City': 0,
    'Los Angeles': 1,
    'Dallas': 2,
    'Mountain View': 3,
    'Austin': 4,
    'Boston': 5,
    'Washington D.C.': 6,
    'San Diego': 7
}

replacement_dict3 = {
    'Yes': 1,
    'No': 0
}


data['Gender'] = data['Gender'].replace(replacement_dict)
data['City'] = data['City'].replace(replacement_dict2)
data['Illness'] = data['Illness'].replace(replacement_dict3)

In [39]:
data.corr(method = 'pearson')

Unnamed: 0,City,Gender,Age,Income,Illness
City,1.0,0.00204,-0.001015,-0.067104,0.003324
Gender,0.00204,1.0,-0.003653,0.198888,0.001297
Age,-0.001015,-0.003653,1.0,-0.001318,0.001811
Income,-0.067104,0.198888,-0.001318,1.0,0.000298
Illness,0.003324,0.001297,0.001811,0.000298,1.0


Dependent - Independent Variables

In [40]:
dependent = data["Illness"];
independent = data.drop(columns = ["Illness"]);

Train-Test Split

In [41]:
from sklearn.model_selection import train_test_split

In [42]:
X_train, X_test, y_train, y_test = train_test_split(independent, dependent, test_size=0.2, random_state=42)

In [43]:
X_validation, X_test, y_validation, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

Model Creation

In [44]:
from keras.initializers import he_normal

model = keras.Sequential([
     tf.keras.layers.Dense(4, activation = 'relu', name = 'input', input_dim = 4, kernel_initializer=he_normal(seed=None)),
     tf.keras.layers.Dense(10, activation = 'relu', name = 'L2', kernel_initializer=he_normal(seed=None)),
     tf.keras.layers.Dense(6 , activation = 'relu', name = 'L3', kernel_initializer=he_normal(seed=None)),
     tf.keras.layers.Dense(1, activation = 'sigmoid', name = 'output', kernel_initializer=he_normal(seed=None))
])

In [45]:
model.get_weights()

[array([[ 0.16071314, -0.20328851,  0.238086  ,  0.21524836],
        [-0.65590364,  0.9281536 ,  0.20654264,  0.4465581 ],
        [-0.4544225 ,  0.46685794,  0.30711538, -1.1376691 ],
        [-0.631268  , -0.44831032, -0.82619524,  1.6030538 ]],
       dtype=float32),
 array([0., 0., 0., 0.], dtype=float32),
 array([[-0.15231799, -0.20079882,  0.9892215 , -0.03278311,  0.5928135 ,
         -0.29202685,  1.2390666 ,  0.6936628 ,  0.87608945, -0.06050804],
        [-0.4166125 , -0.48753572, -0.03814913,  0.20542294,  0.04282338,
         -0.246715  ,  0.91647816, -0.2963113 ,  0.14728181, -0.29200566],
        [-0.9953745 ,  0.8100692 ,  0.26007122,  1.0795009 ,  1.3274045 ,
         -1.4312719 , -0.9297762 ,  0.01115838,  0.2947948 , -1.5705621 ],
        [-1.2285523 , -0.9919971 , -0.683464  , -0.5224803 , -0.50805616,
         -0.57157075,  0.33445737, -0.52577037,  1.0889472 , -0.21299276]],
       dtype=float32),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32),
 a

In [47]:
independent.shape

(150000, 4)

In [48]:
model.compile(
    loss = tf.keras.losses.BinaryCrossentropy(),
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0001),
    metrics = ['accuracy']
)

model.fit(
    X_train, y_train,
    epochs = 30, batch_size = 128
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7b531c6f3d90>

In [49]:
model.compile(
    loss = tf.keras.losses.BinaryCrossentropy(),
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0001),
    metrics = ['accuracy']
)

model.fit(
    X_validation, y_validation,
    epochs = 30, batch_size = 32
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7b531c460a30>

In [50]:
model.compile(
    loss = tf.keras.losses.BinaryCrossentropy(),
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0001),
    metrics = ['accuracy']
)

model.fit(
    X_test, y_test,
    epochs = 30, batch_size = 32
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7b531c3d5480>

In [None]:
model.summary()