In [329]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [330]:
dataset=pd.read_csv("/content/heart_failure_clinical_records_dataset (1).csv")

In [331]:
dataset.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [332]:
dataset.value_counts('DEATH_EVENT')

Unnamed: 0_level_0,count
DEATH_EVENT,Unnamed: 1_level_1
0,203
1,96


In [333]:
dataset.shape

(299, 13)

In [334]:
X=dataset.drop(['DEATH_EVENT'],axis=1)
Y=dataset['DEATH_EVENT']

In [335]:
scale=StandardScaler()

In [336]:
scaled_X=scale.fit_transform(X)
X_train,X_test,Y_train,Y_test=train_test_split(scaled_X,Y,test_size=0.2,random_state=31)

In [337]:
model=SVC()

In [338]:
model.fit(X_train,Y_train)

In [339]:
prediction_on_training_data = model.predict(X_train)
accuracy_on_training_data = accuracy_score(Y_train, prediction_on_training_data)
print("The accuracy score on training data: ", accuracy_on_training_data)

The accuracy score on training data:  0.9288702928870293


In [340]:
prediction_on_testing_data = model.predict(X_test)
accuracy_on_testing_data = accuracy_score(Y_test, prediction_on_testing_data)
print("The accuracy score on testing data: ", accuracy_on_testing_data)

The accuracy score on testing data:  0.7166666666666667


In [341]:
param_grids = {
    'C': [0.1, 1, 10],                 # Regularization strength
    'gamma': ['scale', 0.01, 0.001],  # Kernel coefficient
    'kernel': ['rbf', 'linear']       # Kernel type
}

grid_search = GridSearchCV(
    estimator=model,
    scoring='accuracy',
    n_jobs=-1,
    verbose=1,
    cv=4,
    param_grid=param_grids
)
grid_search.fit(X_train,Y_train)

grid_search.best_params_
grid_search.best_score_
best_svc = grid_search.best_estimator_
test_accuracy = best_svc.score(X_test,Y_test)
test_accuracy





Fitting 4 folds for each of 18 candidates, totalling 72 fits


0.7166666666666667

In [342]:
input=[75,0,582,0,20,1,265000,1.9,130,1,4,0]

input_array = np.asarray(input)
input_array_reshaped = input_array.reshape(1, -1)
input_data = scale.transform(input_array_reshaped)

prediction = model.predict(input_data)
print("Prediction:", prediction)
def switch(prediction):
  if (prediction[0]==1):
    return'Dead'
  if (prediction[0]==0):
    return'Alive'

print(switch(prediction))

Prediction: [1]
Dead




In [343]:
import gzip
import pickle

with gzip.open('model.pkl','wb') as f:
  pickle.dump(model,f)