<a href="https://colab.research.google.com/github/zeyneppniisa/Heart-Disease-Prediction/blob/main/heart_disease_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# This mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive')

##What is in the Data

In [None]:
import pandas as pd
# Specify the file path
dosya_yolu = '/content/drive/MyDrive/dataset_heart.csv'

# Read the csv file
df = pd.read_csv(dosya_yolu)

###Quick Information About the Data

In [None]:
df.info()

##Stastical Description of Data

In [None]:
df.describe()

###Presence of Heart Disease by Age

In [None]:
# Calculate the presence of heart disease as a percentage according to age
heart_disease_percentage = df.groupby('age')['heart disease'].value_counts(normalize=True).mul(100).rename('Percentage').reset_index()

# Select 2 in the Heart disease column
heart_disease_present = heart_disease_percentage[heart_disease_percentage['heart disease'] == 2]


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(12, 6))
sns.barplot(x='age', y='Percentage', data=heart_disease_present, palette='viridis')
plt.title('Presence of Heart Disease by Age (%)')
plt.xlabel('Age')
plt.ylabel('Presence of Heart Disease (%)')

##Presence of Heart Disease by Fasting Blood Sugar

In [None]:
# Calculate the presence of heart disease as a percentage according to age
heart_disease_percentage2 = df.groupby('fasting blood sugar')['heart disease'].value_counts(normalize=True).mul(100).rename('Percentage').reset_index()

# Select 2 in the Heart disease column
heart_disease_present2 = heart_disease_percentage2[heart_disease_percentage2['heart disease'] == 2]

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
# Pie chart
plt.figure(figsize=(12, 6))
plt.pie(heart_disease_present2['Percentage'], labels=heart_disease_present2['fasting blood sugar'], autopct='%1.1f%%', startangle=90, colors=sns.color_palette('viridis'))
plt.title('Presence of Heart Disease by Fasting Blood Sugar (%)')
plt.legend(['Presence','Absence'])
plt.show()

##Splitting the Dataset into Independent Variables (X) and Dependent Variable (y)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
x = df.drop(columns = 'heart disease',axis=1)
y = df['heart disease']

In [None]:
print(x)

In [None]:
print(y)

##Splitting the Dataset into Training and Test Subsets

In [None]:
x_train, x_test ,y_train ,y_test = train_test_split(x, y,test_size=0.2,stratify=y, random_state=2)


##Creating a Logistic Regression Model







In [None]:
model = LogisticRegression()

## Training the Model

In [None]:
#training the logistic regression model wit training data
model.fit(x_train,y_train)

##Make Predictions and Calculate Accuracy Score

In [None]:
x_train_prediction = model.predict(x_train)
trainig_data_accuracy = accuracy_score(x_train_prediction, y_train)


In [None]:
print('Accuracy on the Training data: ',trainig_data_accuracy)


In [None]:
x_test_prediction = model.predict(x_test)
test_data_accuracy = accuracy_score(x_test_prediction, y_test)

In [None]:
print('Accuracy on the test data: ',test_data_accuracy)

##Testing

In [None]:
# Input data for prediction
input_data = (66, 1, 4, 128, 528, 0, 2, 132, 0, 2.8, 2, 3, 7)

# Convert input data to a list
input_data_as_list = list(input_data)

# Reshape the list as we are predicting for only one instance
input_data_reshaped = [input_data_as_list]

# Make a prediction using the trained model
prediction = model.predict(input_data_reshaped)

# Display the prediction result
if prediction[0] == 0:
    print('Does not have a heart disease')
else:
    print('Has heart disease')
