# Library Imports


In [1]:
# Import necessary libraries
import numpy as np
import pandas  as pd
from sklearn.model_selection import train_test_split
import pickle
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')

# Loading the Dataset

In [2]:
# Load the dataset
df=pd.read_csv("heart_2020_cleaned.csv")

In [None]:
df.head(89)

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
print(df.shape)

# Data Preprocessing

In [7]:
#Data mapping is a technique used to transform different types of data 
#to a common format that is suitable for machine learning models to improve 
#the model's performance. For example, BMI, a continuous variable, can be mapped
#to four categories: Normal weight BMI, Underweight BMI, Overweight BMI, and
#Obese. This mapping helps the model to better understand the patterns in the
#data and make more accurate predictions.


In [8]:
target=df["HeartDisease"]
df.drop(["HeartDisease"], axis=1, inplace=True)


### Categorical Features Conversion

In [9]:
df.replace("Yes",1,inplace=True)
df.replace("No",0,inplace=True)

In [None]:
df.AgeCategory.unique()

In [11]:
df.replace("18-24",0,inplace=True)
df.replace("25-29",1,inplace=True)
df.replace("30-34",2,inplace=True)
df.replace("35-39",3,inplace=True)
df.replace("40-44",4,inplace=True)
df.replace("45-49",5,inplace=True)
df.replace("50-54",6,inplace=True)
df.replace("55-59",7,inplace=True)
df.replace("60-64",8,inplace=True)
df.replace("65-69",9,inplace=True)
df.replace("70-74",10,inplace=True)
df.replace("75-79",11,inplace=True)
df.replace("80 or older",13,inplace=True)

In [None]:
df.Diabetic.unique()

In [13]:
df.replace("No, borderline diabetes",2,inplace=True)
df.replace("Yes (during pregnancy)",3,inplace=True)

In [None]:
df.GenHealth.unique()


In [15]:
df.replace("Excellent",0,inplace=True)
df.replace("Good",1,inplace=True)
df.replace("Fair",2,inplace=True)
df.replace("Very good",3,inplace=True)
df.replace("Poor",4,inplace=True)

In [None]:
df.Race.unique()

In [17]:
df.replace("White",0,inplace=True)
df.replace("Other",1,inplace=True)
df.replace("Black",2,inplace=True)
df.replace("Hispanic",3,inplace=True)
df.replace("Asian",4,inplace=True)
df.replace("American Indian/Alaskan Native",5,inplace=True)

In [None]:
df.Sex.unique()

In [19]:

df.replace("Female",0,inplace=True)
df.replace("Male",1,inplace=True)

###  BMI Categorization

In [None]:
df.BMI.unique()

In [21]:
df['BMI'].mask(df['BMI']  < 18.5, 0, inplace=True)
df['BMI'].mask(df['BMI'].between(18.5,25), 1, inplace=True)
df['BMI'].mask(df['BMI'].between(25,30), 2, inplace=True)
df['BMI'].mask(df['BMI']  > 30, 3, inplace=True)

In [None]:
df.head()

# Train-Test Split

In [23]:
# Split the data into training and testing
X_train,X_test,y_train,y_test = train_test_split(df,target,test_size=20,random_state=2)

# Training the Logistic Regression Model

In [24]:
# Train a logistic regression model on the training set
LogRegModel=LogisticRegression()

In [None]:
LogRegModel.fit(X_train, y_train)

In [26]:
# Make predictions on the test data
y_pred = LogRegModel.predict(X_test)


# Evaluate Model Performance

In [None]:
from sklearn.metrics import accuracy_score

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")


# Saving the Model with Pickle

In [28]:
# Save the model using pickle
with open('LogRegModel.pkl', 'wb') as f:
    pickle.dump(LogRegModel, f)