# Objective for Part 4

In Part 1 - 3, we performed data cleaning, EDA and feature engineering which are essential before the data set is ready for ML Model training and testing.

In Part 4, the dataset will be trained in 5 different types of ML Models to find the suitable model for ICU mortality rate prediction.

In [1]:
# Step 1: Import pandas
import pandas as pd

#ML model libraries
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

#ML scoring libraries
from sklearn.metrics import roc_auc_score, confusion_matrix

In [2]:
# Step 2: Read your CSV into a DataFrame
df = pd.read_csv(r"data/data_part3.csv", index_col=False)

In [3]:
# Step 3a: Prepare your independent variables
y = df[['hospital_death']]

# Step 3b: Prepare your dependent variables
X = df.drop(columns=['hospital_death'], axis=1)

In [4]:
# Step 4: Import the machine learning libraries

In [5]:
# Step 5: Split your data
X_train, X_test, y_train, y_test = train_test_split(X,y,stratify=y)

In [6]:
# Step 6a: Declare a variable to store the model
dummy_clf = DummyClassifier()

# Step 6b: Fit your train dataset
dummy_clf.fit(X_train, y_train)

# Step 6c: Declare a variable and store your predictions that you make with your model using X test data
dummy_pred = dummy_clf.predict(X_test)



In [7]:
# Step 7: Repeat Step 6 with other models
#LogisticRegression
logistic_clf = LogisticRegression(max_iter=10000)
logistic_clf.fit(X_train, y_train.values.ravel())
logistic_pred = logistic_clf.predict(X_test)

In [8]:
# Step 7: Repeat Step 6 with other models
#DecisionTreeClassifier
decisiontree_clf = DecisionTreeClassifier()
decisiontree_clf.fit(X_train, y_train)
decisiontree_pred = decisiontree_clf.predict(X_test)

In [9]:
# Step 7: Repeat Step 6 with other models
#RandomForestClassifier
rf_clf = RandomForestClassifier()
rf_clf.fit(X_train, y_train.values.ravel())
rf_pred = rf_clf.predict(X_test)

In [10]:
# Step 7: Repeat Step 6 with other models
#KNeighborsClassifier
kn_clf = KNeighborsClassifier()
kn_clf.fit(X_train, y_train.values.ravel())
kn_pred = kn_clf.predict(X_test)

In [11]:
# Step 8: Assess DummyClassifier model performance
print("DummyClassifier:\n")
print(roc_auc_score(y_test, dummy_pred))
print(confusion_matrix(y_test, dummy_pred))

DummyClassifier:

0.5019843630304441
[[17516  1512]
 [ 1571   143]]


In [12]:
# Step 9: Repeat the printing of AUC score and the confusion matrix for the other models

In [13]:
# Step 8: Assess LogisticRegression model performance
print("LogisticRegression:\n")
print(roc_auc_score(y_test, logistic_pred))
print(confusion_matrix(y_test, logistic_pred))

LogisticRegression:

0.5004257681794979
[[19022     6]
 [ 1712     2]]


In [14]:
# Step 8: Assess DecisionTreeClassifier model performance
print("DecisionTreeClassifier:\n")
print(roc_auc_score(y_test, decisiontree_pred))
print(confusion_matrix(y_test, decisiontree_pred))

DecisionTreeClassifier:

0.519674684411525
[[17290  1738]
 [ 1490   224]]


In [15]:
# Step 8: Assess RandomForestClassifier model performance
print("RandomForestClassifier:\n")
print(roc_auc_score(y_test, rf_pred))
print(confusion_matrix(y_test, rf_pred))

RandomForestClassifier:

0.5013561357346258
[[19013    15]
 [ 1708     6]]


In [16]:
# Step 8: Assess KNeighborsClassifier model performance
print("KNeighborsClassifier:\n")
print(roc_auc_score(y_test, kn_pred))
print(confusion_matrix(y_test, kn_pred))

KNeighborsClassifier:

0.5058979287172204
[[18875   153]
 [ 1680    34]]
