<a href="https://colab.research.google.com/github/vaishalivinayaga/Heart-Failure-Detection/blob/main/Detection_of_Heart_Failure.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,r2_score,mean_squared_error
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [None]:
data = pd.read_csv("heart_failure_clinical_records_dataset")

In [None]:
data.head()

In [None]:
data.columns

In [None]:
data.columns = data.columns.str.lower()

In [None]:
df_vis = data.copy()
df_vis.death_event = df_vis.death_event.map({0:'Alive',1:'Dead'})
df_vis.diabetes = df_vis.diabetes.map({0:'No',1:'Yes'})
df_vis.smoking = df_vis.smoking.map({0:'No',1:'Yes'})
df_vis.sex = df_vis.sex.map({0:'Female',1:'Male'})

In [None]:

plt.figure(figsize=(7,7))
plt.pie(data['death_event'].value_counts(),labels=['Alive','Dead'],autopct='%1.1f%%',shadow=True,explode=[0,0.1], colors = ['lightblue','lightgreen'])
plt.title('Death Event',fontsize=20)
plt.show()

In [None]:
df_vis['sex'].value_counts()

In [None]:
plt.figure(figsize=(7,7))
sns.countplot(x='sex',hue='death_event',data=df_vis)

plt.figure(figsize=(7,7))
sns.violinplot(x='death_event',y='age',data=df_vis, palette='Set3')
plt.title('Age with Death Event',fontsize=20)
plt.show()




plt.figure(figsize=(7,7))
sns.countplot(x='diabetes',hue='death_event',data=df_vis, palette='Set1')
plt.title('Diabetes with Death Event',fontsize=20)
plt.show()


plt.figure(figsize=(7,7))
sns.countplot(x='smoking',hue='death_event',data=df_vis, palette='Set2')
plt.title('Smoking with Death Event',fontsize=20)
plt.show()


In [None]:
import plotly.express as px
fig = px.violin(df_vis, y="creatinine_phosphokinase", x="death_event", color="death_event", box=True, points="all", hover_data=df_vis.columns)
fig.show()



In [None]:

data.isnull().sum()

nums = data.select_dtypes(exclude=["object"])
nums.plot(subplots = True , kind ='box', layout = (15,4), figsize = (25,35), patch_artist= True ,color = "#6F266E")
plt.subplots_adjust(wspace = 0.5)
plt.style.use("ggplot")
plt.show()

data = data.drop(data[data['platelets']>420000].index)
data = data.drop(data[data['serum_creatinine']>2.5].index)
data = data.drop(data[data['creatinine_phosphokinase']>1500].index)


plt.figure(figsize=(14,10))
sns.heatmap(data.corr(),annot=True,cmap='coolwarm')
plt.title('Correlation Heatmap',fontsize=20)
plt.show()


In [None]:


plt.rcParams['figure.figsize']=15,6
sns.set_style("darkgrid")
x = data.iloc[:, :-1]
y = data.iloc[:,-1]
from sklearn.ensemble import ExtraTreesClassifier
import matplotlib.pyplot as plt
model = ExtraTreesClassifier()
model.fit(x,y)
print(model.feature_importances_)
feat_importances = pd.Series(model.feature_importances_, index=x.columns)
feat_importances.nlargest(12).plot(kind='barh',color='blue')
plt.show()


from statsmodels.stats.outliers_influence import variance_inflation_factor
vif_data=data
VIF=pd.Series([variance_inflation_factor(vif_data.values,i) 
for i in range(vif_data.shape[1])],index=vif_data.columns)
VIF

from statsmodels.stats.outliers_influence import variance_inflation_factor
vif_data=data
VIF=pd.Series([variance_inflation_factor(vif_data.values,i) 
for i in range(vif_data.shape[1])],index=vif_data.columns)
VIF


def MC_remover(data):
    vif=pd.Series([variance_inflation_factor(data.values,i)for i in range(data.shape[1])],index=data.columns)
    if vif.max()>13:
        print(vif[vif == vif.max()].index[0],'has been removed')
        data = data.drop(columns=[vif[vif==vif.max()].index[0]])
        return data
    else:
        print("No multicollinearity present anymore")
        return data


for i in range(10):
    vif_data=MC_remover(vif_data)
vif_data.head()

In [None]:
VIF=pd.Series([variance_inflation_factor(vif_data.values,i) for i in range(vif_data.shape[1])],index=vif_data.columns)
VIF,len(vif_data.columns)

X = vif_data.drop('death_event',axis=1)
y = vif_data['death_event']


from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=42)
X_sm, y_sm = sm.fit_resample(X, y)


In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_sm = scaler.fit_transform(X_sm)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_sm, y_sm, test_size=0.2, random_state=42)

In [None]:
from gc import callbacks
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.layers import LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(
     min_delta=0.001, 
    patience=20, 
    restore_best_weights=True
)
model = Sequential()

In [None]:

model.add(Dense(units = 16, kernel_initializer = 'uniform', activation = 'relu', input_dim = 10))

model.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dropout(0.25))

model.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dropout(0.25))

model.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dropout(0.01))

model.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid')) 



In [None]:

model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
history = model.fit(X_train, y_train, batch_size = 32, epochs = 200,callbacks=[early_stopping], validation_split=0.2)

In [None]:
plt.title('Training and Validation loss')
plt.plot(history.history['loss'], label='loss', color='blue')
plt.plot(history.history['val_loss'], label='validation loss', color='orange')
plt.legend()
plt.show()

plt.title('Training and Validation accuracy')
plt.plot(history.history['accuracy'], label='accuracy', color='green')
plt.plot(history.history['val_accuracy'], label='validation accuracy', color='red')
plt.legend()
plt.show()

y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)
y_pred

In [None]:

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))
model.summary()

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.show()