Importing the Dependencies

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler


Load and explore the dataset

In [6]:
# Load the dataset
heart_data = pd.read_csv(r'D:\C Drive files\desktop\All Projects\Heart Disease Prediction\data.csv')

# Display basic information
print(heart_data.head())
print(heart_data.info())
print(heart_data.describe())


   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5 

Splitting the Features and Target

In [7]:
# Splitting features (X) and target (Y)
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']


In [8]:
print(X)

     age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
0     63    1   3       145   233    1        0      150      0      2.3   
1     37    1   2       130   250    0        1      187      0      3.5   
2     41    0   1       130   204    0        0      172      0      1.4   
3     56    1   1       120   236    0        1      178      0      0.8   
4     57    0   0       120   354    0        1      163      1      0.6   
..   ...  ...  ..       ...   ...  ...      ...      ...    ...      ...   
298   57    0   0       140   241    0        1      123      1      0.2   
299   45    1   3       110   264    0        1      132      0      1.2   
300   68    1   0       144   193    1        1      141      0      3.4   
301   57    1   0       130   131    0        1      115      1      1.2   
302   57    0   1       130   236    0        0      174      0      0.0   

     slope  ca  thal  
0        0   0     1  
1        0   0     2  
2        2   0    

In [9]:
print(Y)

0      1
1      1
2      1
3      1
4      1
      ..
298    0
299    0
300    0
301    0
302    0
Name: target, Length: 303, dtype: int64


Splitting the Data into Training data & Test Data

In [10]:
# Splitting the data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

print(X.shape, X_train.shape, X_test.shape)


(303, 13) (242, 13) (61, 13)


Scale the Features

In [11]:
# Scaling the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


Model Training

Logistic Regression

In [13]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

# Training Logistic Regression with increased max_iter
model = LogisticRegression(max_iter=1000)
model.fit(X_train, Y_train)

print("Model training completed successfully!")


Model training completed successfully!


Model Evaluation

In [14]:
# Evaluate on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print('Accuracy on Training data : ', training_data_accuracy)

# Evaluate on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print('Accuracy on Test data : ', test_data_accuracy)


Accuracy on Training data :  0.8471074380165289
Accuracy on Test data :  0.7868852459016393


Building a Predictive System

In [15]:
# Input data
input_data = (52, 1, 2, 172, 199, 1, 1, 162, 0, 0.5, 2, 0, 3)

# Convert input data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

# Reshape for prediction
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

# Scale the input data
input_data_scaled = scaler.transform(input_data_reshaped)

# Make prediction
prediction = model.predict(input_data_scaled)
if prediction[0] == 0:
    print('The Person does not have a Heart Disease')
else:
    print('The Person has Heart Disease')


The Person has Heart Disease
