<a href="https://colab.research.google.com/github/visruthcv/Streamlit/blob/main/Heart_failure_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Heart Failure Project 


## Importing libraries and dataset

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
data = pd.read_csv('/content/gdrive/MyDrive/heart_failure_clinical_records_dataset.csv')

## Define the problem - Data Inspection

In [None]:
data.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [None]:
data.columns

Index(['age', 'anaemia', 'creatinine_phosphokinase', 'diabetes',
       'ejection_fraction', 'high_blood_pressure', 'platelets',
       'serum_creatinine', 'serum_sodium', 'sex', 'smoking', 'time',
       'DEATH_EVENT'],
      dtype='object')

In [None]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   age                       299 non-null    float64
 1   anaemia                   299 non-null    int64  
 2   creatinine_phosphokinase  299 non-null    int64  
 3   diabetes                  299 non-null    int64  
 4   ejection_fraction         299 non-null    int64  
 5   high_blood_pressure       299 non-null    int64  
 6   platelets                 299 non-null    float64
 7   serum_creatinine          299 non-null    float64
 8   serum_sodium              299 non-null    int64  
 9   sex                       299 non-null    int64  
 10  smoking                   299 non-null    int64  
 11  time                      299 non-null    int64  
 12  DEATH_EVENT               299 non-null    int64  
dtypes: float64(3), int64(10)
memory usage: 30.5 KB
None


In [None]:
print('Classes and number of values in the dataset',Counter(data['DEATH_EVENT']))

Classes and number of values in the dataset Counter({0: 203, 1: 96})


We can clearly see that the dataset column 'Death Event' is not balanced, the target 1 is like half the target 0

### Creating X and Y, features and target

In [None]:
y = data['DEATH_EVENT']

In [None]:
x = data[['age','anaemia','creatinine_phosphokinase','diabetes','ejection_fraction','high_blood_pressure','platelets','serum_creatinine','serum_sodium','sex','smoking','time']]

### Converting X features to categorical values

In [None]:
x  = pd.get_dummies(x)

### Splitting to train and test

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)

### Scaling the features using standardscaler

In [None]:
ct = ColumnTransformer([("numeric", StandardScaler(), ['age','creatinine_phosphokinase','ejection_fraction','platelets','serum_creatinine','serum_sodium','time'])])

In [None]:
X_train = ct.fit_transform(X_train)

In [None]:
X_test = ct.transform(X_test)

### Preparing the labels for the classification

In [None]:
le = LabelEncoder()

In [None]:
Y_train = le.fit_transform(Y_train.astype(str))
Y_test = le.fit_transform(Y_test.astype(str))

In [None]:
Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)

### Design the model

In [None]:
model = Sequential()

Designing an input layer with the number of featuers as input

In [None]:
model.add(InputLayer(input_shape=(X_train.shape[1],)))

adding and hidden layer

In [None]:
model.add(Dense(12, activation='relu'))

Output layer with the softmax activation function because we have a classification task, with the number of neurons corresponding to the number of classes

In [None]:
model.add(Dense(2, activation='softmax'))

model compile, loss set equal to categorical crossentropy, optimizer adam and metrcis accuracy

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

### Fitting the model

In [None]:
final_nn=model.fit(X_train, Y_train, epochs = 10, batch_size = 16)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
history = model.fit(X_train, Y_train,
                    validation_split = 0.3,
                    epochs=10, 
                    batch_size=16,
                    )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Evaluating the model

In [None]:
filename = 'finalized_model.pkl'


In [None]:
import pickle
pickle.dump(final_nn, open(filename, 'wb'))

INFO:tensorflow:Assets written to: ram://74ae5ea9-e73e-4357-a503-c77372ad0102/assets


In [None]:
loss, acc = model.evaluate(X_test, Y_test, verbose=0)

In [None]:
print("Loss", loss, "Accuracy:", acc)

Loss [0.48763662576675415, 0.47632667422294617, 0.46449515223503113, 0.45506903529167175, 0.4454653561115265, 0.43633782863616943, 0.427408367395401, 0.4193803071975708, 0.41130563616752625, 0.4044201374053955] Accuracy: 0.7555555701255798


### Predictions on the model

In [None]:
y_estimate = model.predict(X_test, verbose=0)

In [None]:
y_estimate = np.argmax(y_estimate, axis=1)

In [None]:
y_true = np.argmax(Y_test, axis=1)

### Printing additional metrics, such as F1_score

In [None]:
print(classification_report(y_true, y_estimate))

              precision    recall  f1-score   support

           0       0.72      0.94      0.82        53
           1       0.86      0.49      0.62        37

    accuracy                           0.76        90
   macro avg       0.79      0.71      0.72        90
weighted avg       0.78      0.76      0.74        90

