In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import xgboost as xgb
import pickle

In [None]:
# Load the dataset
heart_data = pd.read_csv('heart.csv')

# Print first 5 rows of the dataset
print(heart_data.head())

# Print last 5 rows of the dataset
print(heart_data.tail())

# Get the number of rows and columns in the dataset
print(heart_data.shape)

# Get some info about the data
print(heart_data.info())

# Check for missing values
print(heart_data.isnull().sum())

# Get statistical measures about the data
print(heart_data.describe())

# Check the distribution of Target Variable
print(heart_data['target'].value_counts())


   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  
     age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
298   57    0   0       140   241    0        1      123      1      0.2   
299   45    1   3       110   264    0        1      132      0      1.2   
300   68    1   0       144   193    1        1      141      0      3.4   
301   57    1   0       130   131    0        1      115      1      1.2   
3

In [None]:
# Split the features and target
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']

# Split the data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
# Define the XGBoost model
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')

# Train the model
model.fit(X_train, Y_train)


In [None]:
# Evaluate the model on the test data
Y_pred = model.predict(X_test)
test_accuracy = accuracy_score(Y_test, Y_pred)
print('Accuracy score on test data:', test_accuracy)


Accuracy score on test data: 0.7540983606557377


In [None]:
def predict_heart_disease(input_data):
    # Convert the input data to a numpy array
    input_data_as_numpy_array = np.asarray(input_data)
    # Reshape the array as we are predicting for one instance
    input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)
    # Standardize the input data
    input_data_reshaped = scaler.transform(input_data_reshaped)
    # Make prediction
    prediction = model.predict(input_data_reshaped)
    if prediction[0] == 0:
        print('The Person does not have a Heart Disease')
    else:
        print('The Person has Heart Disease')

# Test the predictive system
input_data = (62, 0, 0, 140, 268, 0, 0, 160, 0, 3.6, 0, 2, 2)
predict_heart_disease(input_data)


The Person does not have a Heart Disease




In [None]:
# Save the model
filename = 'heart_disease_gbm_model.sav'
pickle.dump(model, open(filename, 'wb'))

# Load the model
loaded_model = pickle.load(open(filename, 'rb'))

# Test the loaded model
predict_heart_disease(input_data)


The Person does not have a Heart Disease


