In [45]:
import pandas as pd

dts = pd.read_csv('E:\heart_failure_clinical_records_dataset.csv')

In [46]:
dts.head()


Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [47]:
dts.shape

(299, 13)

In [48]:
#Assign X and Y variable names
import numpy as np

X = dts.iloc[:,:-1]
y = dts.iloc[:,-1]

In [49]:
X[0:5]

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8


In [50]:
y[0:5]

0    1
1    1
2    1
3    1
4    1
Name: DEATH_EVENT, dtype: int64

In [51]:
#Number (1) Data pre-processing

from sklearn.preprocessing import StandardScaler

Scale = StandardScaler()
X = Scale.fit_transform(X)

In [52]:
#Number (2) Train Test split data

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3,random_state = 4)

In [53]:
X_train.shape

(209, 12)

In [54]:
#Number (3) Algorithm setup & Number(4) model fitting

from sklearn.linear_model import LogisticRegression

LR =  LogisticRegression(C=0.01,solver = 'liblinear')
Model = LR.fit(X_train,y_train)

In [55]:
#Number (5) predict the model

yhat = Model.predict(X_test)
yhat

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 0], dtype=int64)

In [56]:
#Manual verification on the accuracy of the model

Model.predict([[65.0,1,160,1,20,0,327000.00,2.7,116,0,0,8]])

array([0], dtype=int64)

In [57]:
#Number (6) Numerical Evaluation

from sklearn.metrics import confusion_matrix, classification_report

print(confusion_matrix(y_test,yhat))

[[59  7]
 [ 6 18]]


In [58]:
pd.crosstab(y_test,yhat)

col_0,0,1
DEATH_EVENT,Unnamed: 1_level_1,Unnamed: 2_level_1
0,59,7
1,6,18


In [59]:
print(classification_report(y_test,yhat))

              precision    recall  f1-score   support

           0       0.91      0.89      0.90        66
           1       0.72      0.75      0.73        24

    accuracy                           0.86        90
   macro avg       0.81      0.82      0.82        90
weighted avg       0.86      0.86      0.86        90



In [60]:
from sklearn.metrics import jaccard_score
print(jaccard_score(y_test,yhat))  

0.5806451612903226


In [61]:
y_proba = Model.predict_proba(X_test)
y_proba[0:5]

array([[0.53531835, 0.46468165],
       [0.529096  , 0.470904  ],
       [0.56886059, 0.43113941],
       [0.64182001, 0.35817999],
       [0.5025147 , 0.4974853 ]])

In [62]:
# Decrease Recall value

import numpy as np

y_predict_thr3 = np.where(Model.predict_proba(X_test)[:,1]>0.30,1,0) #threshold 0.3

In [63]:
pd.crosstab(y_test,y_predict_thr3)

col_0,0,1
DEATH_EVENT,Unnamed: 1_level_1,Unnamed: 2_level_1
0,8,58
1,0,24


In [64]:
print(classification_report(y_test,y_predict_thr3))

              precision    recall  f1-score   support

           0       1.00      0.12      0.22        66
           1       0.29      1.00      0.45        24

    accuracy                           0.36        90
   macro avg       0.65      0.56      0.33        90
weighted avg       0.81      0.36      0.28        90



In [65]:
def predict_threshold(Model,X_test,thr):
    return np.where(Model.predict_proba(X_test)[:,1]>thr,1,0)

In [66]:
for thr in np.arange(0,1.1,0.1):
    y_predict = predict_threshold(Model,X_test,thr)
    print('threshold:',thr)
    print(confusion_matrix(y_test,y_predict))
    
    

threshold: 0.0
[[ 0 66]
 [ 0 24]]
threshold: 0.1
[[ 0 66]
 [ 0 24]]
threshold: 0.2
[[ 0 66]
 [ 0 24]]
threshold: 0.30000000000000004
[[ 8 58]
 [ 0 24]]
threshold: 0.4
[[36 30]
 [ 2 22]]
threshold: 0.5
[[59  7]
 [ 6 18]]
threshold: 0.6000000000000001
[[65  1]
 [20  4]]
threshold: 0.7000000000000001
[[66  0]
 [24  0]]
threshold: 0.8
[[66  0]
 [24  0]]
threshold: 0.9
[[66  0]
 [24  0]]
threshold: 1.0
[[66  0]
 [24  0]]
