In [1]:
#Import dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

#Import and read the Cleaned_Covid.csv.
import pandas as pd 
covid_df = pd.read_csv("../Cleaned_Covid.csv")
covid_df.head()

Unnamed: 0.1,Unnamed: 0,SEX,PATIENT_TYPE,DIED,PNEUMONIA,AGE,DIABETES,COPD,ASTHMA,IMSUPR,HYPERTENSION,OTHER_DISEASE,CARDIOVASCULAR,OBESITY,RENAL_CHRONIC,TOBACCO
0,0,1,1,1,1.0,65.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0
1,1,2,1,1,1.0,72.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0
2,2,2,2,1,2.0,55.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
3,3,1,1,1,2.0,53.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
4,4,2,1,1,2.0,68.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,2.0


In [2]:
#Start cleaning data further, removing "unnamed" column, changing data types, and renaming all columns
covid_df = covid_df.drop(columns=["Unnamed: 0"])
covid_df = covid_df.astype("int64")
covid_df = covid_df.rename(columns=str.lower)

In [3]:
#Change all values of 2 to 0
covid_df["sex"] = covid_df["sex"].replace(2, 0)
covid_df["patient_type"] = covid_df["patient_type"].replace(2, 0)
covid_df["pneumonia"] = covid_df["pneumonia"].replace(2, 0)
covid_df["diabetes"] = covid_df["diabetes"].replace(2, 0)
covid_df["copd"] = covid_df["copd"].replace(2, 0)
covid_df["asthma"] = covid_df["asthma"].replace(2, 0)
covid_df["imsupr"] = covid_df["imsupr"].replace(2, 0)
covid_df["hypertension"] = covid_df["hypertension"].replace(2, 0)
covid_df["other_disease"] = covid_df["other_disease"].replace(2, 0)
covid_df["cardiovascular"] = covid_df["cardiovascular"].replace(2, 0)
covid_df["obesity"] = covid_df["obesity"].replace(2, 0)
covid_df["renal_chronic"] = covid_df["renal_chronic"].replace(2, 0)
covid_df["tobacco"] = covid_df["tobacco"].replace(2, 0)
covid_df.head()

Unnamed: 0,sex,patient_type,died,pneumonia,age,diabetes,copd,asthma,imsupr,hypertension,other_disease,cardiovascular,obesity,renal_chronic,tobacco
0,1,1,1,1,65,0,0,0,0,1,0,0,0,0,0
1,0,1,1,1,72,0,0,0,0,1,0,0,1,1,0
2,0,0,1,0,55,1,0,0,0,0,0,0,0,0,0
3,1,1,1,0,53,0,0,0,0,0,0,0,0,0,0
4,0,1,1,0,68,1,0,0,0,1,0,0,0,0,0


In [4]:
#Create 3 categorical groups for the ages to limit variability
young = [*range(0,26,1)]
middle = [*range(26, 51,1)]
older = [*range(51, 122,1)]

for age in young:
  covid_df["age"] = covid_df["age"].replace(age, "0-25")

for age in middle:
  covid_df["age"] = covid_df["age"].replace(age, "26-50")

for age in older:
  covid_df["age"] = covid_df["age"].replace(age, "51-121")

In [5]:
#Turn categorical data to numeric
new_covid_df = pd.get_dummies(covid_df)
new_covid_df.head()

Unnamed: 0,sex,patient_type,died,pneumonia,diabetes,copd,asthma,imsupr,hypertension,other_disease,cardiovascular,obesity,renal_chronic,tobacco,age_0-25,age_26-50,age_51-121
0,1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1
1,0,1,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1
2,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1
3,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1


In [6]:
#Split the data
X = new_covid_df.drop(columns="died").values
y = new_covid_df["died"].values
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=42, stratify=y)
X.shape

(1019345, 16)

In [7]:
#Scale the data
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
# Implementing a keras model tuner to improve accuracy
def create_model(hp):
    nn_died_model = tf.keras.models.Sequential()
    
    activation = hp.Choice("activation", ["relu", "tanh", "sigmoid"])
    
    nn_died_model.add(tf.keras.layers.Dense(units=hp.Int("first_units", min_value= 1, max_value= 150, step= 2),
                                      activation= activation, input_dim= 16))
    
    for i in range(hp.Int("num_layers", 1, 6)):
        nn_died_model.add(tf.keras.layers.Dense(units=hp.Int("units_" + str(i), min_value= 1, max_value= 150, step=2),
                                           activation=activation))
    nn_died_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    
    nn_died_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    
    return nn_died_model

In [9]:
from tensorflow import keras
import keras_tuner as kt

tuner2 = kt.Hyperband(create_model, objective="val_accuracy", max_epochs = 100, hyperband_iterations=1)

INFO:tensorflow:Reloading Oracle from existing project .\untitled_project\oracle.json
INFO:tensorflow:Reloading Tuner from .\untitled_project\tuner0.json


In [10]:
tuner2.search(X_train_scaled, y_train, epochs= 100, validation_data=(X_test_scaled, y_test))

Trial 254 Complete [01h 37m 22s]
val_accuracy: 0.9326000809669495

Best val_accuracy So Far: 0.9326314330101013
Total elapsed time: 01h 37m 22s
INFO:tensorflow:Oracle triggered exit


In [12]:
#Show the top hyperparameters
top_hyper = tuner2.get_best_hyperparameters(1)
for param in top_hyper:
    print(param.values)

{'activation': 'relu', 'first_units': 21, 'num_layers': 4, 'units_0': 23, 'units_1': 119, 'units_2': 141, 'units_3': 27, 'units_4': 45, 'units_5': 65, 'tuner/epochs': 100, 'tuner/initial_epoch': 34, 'tuner/bracket': 1, 'tuner/round': 1, 'tuner/trial_id': '0237'}


In [13]:
#Evaluate the tuned model
tuner_model = tuner2.get_best_models(1)
for model in tuner_model:
    model_loss, model_accuracy = model.evaluate(X_test_scaled,y_test,verbose=2)
    print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

7964/7964 - 10s - loss: 0.1338 - accuracy: 0.9326 - 10s/epoch - 1ms/step
Loss: 0.13382582366466522, Accuracy: 0.9326314330101013


In [15]:
#Save the model for future implementation
for model in tuner_model:
    model.save("NN_Died_Optimization.h5")