In [105]:
#We import Python's data analysis library
import pandas as pd

#We imported our CSV file data to be processed as data variable
data = pd.read_csv("C:/Users/dekk/Desktop/ml/covid19_dataset.csv")


In [106]:

#We calculated patient height and weight means to missing places in CSV file
height_mean = data['HEIGHT'].mean()
weight_mean = data['WEIGHT'].mean()


In [107]:

#We imputed numerical missing values with their means seperately
data['HEIGHT'] = data['HEIGHT'].fillna(height_mean)
data['WEIGHT'] = data['WEIGHT'].fillna(weight_mean)


In [108]:

#We dropped null clinical values
data.dropna(inplace=True)


In [109]:

#We drop IDs either
data.drop(['ID'], axis=1 ,inplace=True)


In [110]:

#We represented YES/NO values as 1/0
data["INTUBATION"] = data["INTUBATION"].map({'YES': 1, 'NO': 0})


In [111]:

#We started to classify INTUBATION data
#Pandas drop columns using list of column names
X = data.drop(['INTUBATION', 'INTENSIVE CARE'], axis=1)
Y = data["INTUBATION"]


In [112]:

#Printing out our model shapes
print(X.shape)
print(Y.shape)




(1362, 34)
(1362,)


In [113]:


#We imported scikit-learn's train_test_split library to split those each
from sklearn.model_selection import train_test_split



In [114]:
#We assigned values in train_test_split function, Test Group is assigned as %25 percent and the rest is defined as Training Group
X_Train, X_Test, Y_Train, Y_Test = train_test_split(X ,Y, test_size = 0.25, random_state = 0)


In [115]:

#We imported RandomForestClassifier and fitted our Train data
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 200, random_state = 0)
classifier.fit(X_Train,Y_Train)


RandomForestClassifier(n_estimators=200, random_state=0)

In [116]:

#Predicting Test data throughout classifier variable
Y_Pred = classifier.predict(X_Test)


In [117]:

#Printing out the Random Forest accuracy score
from sklearn.metrics import confusion_matrix, accuracy_score
accuracy_score_rf = accuracy_score(Y_Test, Y_Pred)
print(accuracy_score_rf)


0.9560117302052786


In [118]:
print("RANDOM FOREST ALGORITHM ACCURACY SCORE")
print(round(accuracy_score_rf,2,), "%")


RANDOM FOREST ALGORITHM ACCURACY SCORE
0.96 %


In [119]:

#Receiving algorithm performance measure formulas as below
tn, fp, fn, tp = confusion_matrix(Y_Test, Y_Pred).ravel()


In [120]:

#We imported Python's Numpy library and created two empty arrays
import numpy as np
sensitivity_rf = np.empty(0)
specificity_rf = np.empty(0)



In [121]:
#Retrieving Specificity and Sensitivity formulas from 4 variable and assigned those arrays
sensitivity_rf = np.append(sensitivity_rf, np.array([tp / (tp + fn)]))
specificity_rf = np.append(specificity_rf, np.array([tn / (tn + fp)]))



In [122]:
#Printing out the Specificity and Sensitivity variables
print(sensitivity_rf)
print(specificity_rf)




[0.25]
[1.]


In [123]:
#We imported GradientBoostingClassifier and fitted our Train data
from sklearn.ensemble import GradientBoostingClassifier
gradient_boosting = GradientBoostingClassifier(n_estimators = 100, max_depth = 3)
gradient_boosting.fit(X_Train, Y_Train)



GradientBoostingClassifier()

In [124]:
#Printing out the Gradient Boosting accuracy score
acc_gradient = round(gradient_boosting.score(X_Train, Y_Train) * 100, 2)
print(round(acc_gradient,2,), "%")



99.8 %


In [125]:
#Predicting Test data throughout classifier variable and process same operations as we did before
prediction_gb = gradient_boosting.predict(X_Test)
accuracy_score_gb = accuracy_score(prediction_gb, Y_Test)
print(accuracy_score_gb)



0.9413489736070382


In [126]:

tn, fp, fn, tp = confusion_matrix(Y_Test, prediction_gb).ravel()
sensitivity_gb = np.empty(0)
specificity_gb = np.empty(0)
sensitivity_gb = np.append(sensitivity_gb, np.array([tp / (tp + fn)]))
specificity_gb = np.append(specificity_gb, np.array([tn / (tn + fp)]))
print(sensitivity_gb)
print(specificity_gb)


[0.25]
[0.98442368]


In [127]:

#Now, we are about to classify INTENSIVE CARE data
#Same operation proceeded like before
data["INTENSIVE CARE"] = data["INTENSIVE CARE"].map({'YES': 1, 'NO': 0})
X = data.drop(['INTUBATION', 'INTENSIVE CARE'], axis=1)
Y = data["INTENSIVE CARE"]



In [128]:
#Printing out our second model shapes
print(X.shape)
print(Y.shape)


(1362, 34)
(1362,)


In [129]:


#We fitted our Train data for our 2nd classification
classifier = RandomForestClassifier(n_estimators = 200, random_state = 0)
classifier.fit(X_Train,Y_Train)


RandomForestClassifier(n_estimators=200, random_state=0)

In [130]:

#Predicting Test data throughout classifier variable
Y_Pred = classifier.predict(X_Test)


In [131]:

#Printing out the Random Forest accuracy score
accuracy_score_rf = accuracy_score(Y_Test, Y_Pred)
print(accuracy_score_rf)



0.9560117302052786


In [133]:

#Receiving algorithm performance measure formulas as below
tn, fp, fn, tp = confusion_matrix(Y_Test, Y_Pred).ravel()



In [134]:
#We imported Python's Numpy library and created two empty arrays
import numpy as np
sensitivity_rf = np.empty(0)
specificity_rf = np.empty(0)



In [135]:
#Retrieving Specificity and Sensitivity formulas from 4 variable
sensitivity_rf = np.append(sensitivity_rf, np.array([tp / (tp + fn)]))
specificity_rf = np.append(specificity_rf, np.array([tn / (tn + fp)]))



In [136]:
#Printing out the Specificity and Sensitivity variables
print(sensitivity_rf)
print(specificity_rf)



[0.25]
[1.]


In [137]:
#We imported GradientBoostingClassifier and fitted our Train data
from sklearn.ensemble import GradientBoostingClassifier
gradient_boosting = GradientBoostingClassifier(n_estimators = 100, max_depth = 3)
gradient_boosting.fit(X_Train, Y_Train)



GradientBoostingClassifier()

In [138]:
#Printing out the Gradient Boosting accuracy score
acc_gradient = round(gradient_boosting.score(X_Train, Y_Train) * 100, 2)
print(round(acc_gradient,2,), "%")



99.9 %


In [139]:
#Predicting Test data throughout classifier variable and process same operations as we did before
prediction_gb = gradient_boosting.predict(X_Test)
accuracy_score_gb = accuracy_score(prediction_gb, Y_Test)
print(accuracy_score_gb)


0.9384164222873901


In [104]:


tn, fp, fn, tp = confusion_matrix(Y_Test, prediction_gb).ravel()
sensitivity_gb = np.empty(0)
specificity_gb = np.empty(0)
sensitivity_gb = np.append(sensitivity_gb, np.array([tp / (tp + fn)]))
specificity_gb = np.append(specificity_gb, np.array([tn / (tn + fp)]))
print(sensitivity_gb)
print(specificity_gb)

[0.3]
[0.97819315]
