In [None]:
import pandas as pd
import numpy as np

In [None]:
heart_dataset = pd.read_csv('/content/Heart_Disease_Prediction.csv')

In [None]:
heart_dataset.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence


In [None]:
#split the dataset
x = heart_dataset.iloc[:,:-1].values
y = heart_dataset.iloc[:,-1].values

In [None]:
#Data preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

In [None]:
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(x)

In [None]:
encoder = LabelEncoder()
heart_dataset['Heart Disease'] = encoder.fit_transform(heart_dataset['Heart Disease'])

In [None]:
heart_dataset.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,1
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,0
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,1
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,0
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,0


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [None]:
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

In [None]:
# Initialize models
svm_model = SVC(kernel='linear')
naive_bayes_model = GaussianNB()
knn_model = KNeighborsClassifier()
logreg_model = LogisticRegression()

In [None]:
# Train models
svm_model.fit(X_train, y_train)

In [None]:
naive_bayes_model.fit(X_train, y_train)

In [None]:
knn_model.fit(X_train, y_train)

In [None]:
logreg_model.fit(X_train, y_train)

In [None]:
#Model Evaluation
from sklearn.metrics import accuracy_score, classification_report,precision_score,recall_score,f1_score

In [None]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    precision = precision_score(y_test, y_pred,pos_label="Presence")
    recall = recall_score(y_test, y_pred, pos_label="Presence")
    score = f1_score(y_test, y_pred, average='binary')
    return accuracy, report,precision,recall,score

In [None]:
svm_accuracy, svm_report, svm_p,svm_r,svmf= evaluate_model(svm_model, X_test, y_test)
print("SVM Accuracy:", svm_accuracy)
print("SVM Precision :",svm_p)

SVM Accuracy: 0.8395061728395061
SVM Precision : 0.8518518518518519


In [None]:
naive_bayes_accuracy, naive_bayes_report,nvp,nvr= evaluate_model(naive_bayes_model, X_test, y_test)
print("Naive Bayes Accuracy:", naive_bayes_accuracy)


Naive Bayes Accuracy: 0.8395061728395061


In [None]:
knn_accuracy, knn_report,knn_p ,knn_r= evaluate_model(knn_model, X_test, y_test)
print("KNN Accuracy:", knn_accuracy)

KNN Accuracy: 0.7901234567901234


In [None]:
logreg_accuracy, logreg_report,log_reg_p,lgr = evaluate_model(logreg_model, X_test, y_test)
print("logistic regression Accuracy:", logreg_accuracy)

logistic regression Accuracy: 0.8271604938271605


In [None]:
accuracy=[svm_accuracy,naive_bayes_accuracy,knn_accuracy,logreg_accuracy]
print(accuracy)

[0.8395061728395061, 0.8395061728395061, 0.7901234567901234, 0.8271604938271605]


In [None]:
precision=[svm_p,nvp,knn_p,log_reg_p]
print(precision)

[0.8518518518518519, 0.88, 0.8, 0.875]


In [None]:
recall=[svm_r,nvr,knn_r,lgr]
print(recall)

[0.71875, 0.6875, 0.625, 0.65625]


In [None]:
best_model = max(svm_accuracy, naive_bayes_accuracy, knn_accuracy, logreg_accuracy)

if best_model == svm_accuracy:
    selected_model = svm_model
elif best_model == naive_bayes_accuracy:
    selected_model = naive_bayes_model
elif best_model == knn_accuracy:
    selected_model = knn_model
else:
    selected_model = logreg_model

In [None]:
print("Selected Model:", selected_model)

Selected Model: SVC(kernel='linear')


In [None]:
print("Classification Report:\n", classification_report(y_test, selected_model.predict(X_test)))

Classification Report:
               precision    recall  f1-score   support

     Absence       0.83      0.92      0.87        49
    Presence       0.85      0.72      0.78        32

    accuracy                           0.84        81
   macro avg       0.84      0.82      0.83        81
weighted avg       0.84      0.84      0.84        81



In [None]:
import pickle

In [None]:
filename="trainedmodel1.sav"
pickle.dump(selected_model,open(filename,'wb'))

In [None]:
#loading the saved model
loaded_model=pickle.load(open('trainedmodel1.sav','rb'))

In [None]:
#predicting the user input data by using loaded model
input_data = (70,1,4,130,340,0,2,109,0,2.4,2,3,3)

# changing the input_data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = loaded_model.predict(input_data_reshaped)
print(prediction)

if (prediction[0] == 0):
  print('The person is not having lung cancer')
else:
  print('The person is having lung cancer')

['Presence']
The person is having lung cancer
