<a href="https://colab.research.google.com/github/dirwolf/Heart-Attack-prediction-/blob/main/Heart_atack_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install pandas scikit-learn tensorflow keras



In [1]:
from google.colab import files
uploaded = files.upload()

Saving CVD_cleaned.csv to CVD_cleaned.csv


In [58]:
import io
import pandas as pd

data = pd.read_csv(io.BytesIO(uploaded['CVD_cleaned.csv']))

In [46]:
print(data.head())

  General_Health                  Checkup Exercise Heart_Disease Skin_Cancer  \
0           Poor  Within the past 2 years       No            No          No   
1      Very Good     Within the past year       No           Yes          No   
2      Very Good     Within the past year      Yes            No          No   
3           Poor     Within the past year      Yes           Yes          No   
4           Good     Within the past year       No            No          No   

  Other_Cancer Depression Diabetes Arthritis     Sex Age_Category  \
0           No         No       No       Yes  Female        70-74   
1           No         No      Yes        No  Female        70-74   
2           No         No      Yes        No  Female        60-64   
3           No         No      Yes        No    Male        75-79   
4           No         No       No        No    Male          80+   

   Height_(cm)  Weight_(kg)    BMI Smoking_History  Alcohol_Consumption  \
0        150.0        32.66  

In [59]:
# Convert the 'Heart_Disease' column to numeric
data['Heart_Disease'] = data['Heart_Disease'].map({'No': 0, 'Yes': 1})


In [60]:
# Replace age ranges with their midpoints
def age_to_midpoint(age_range):
    if age_range == '80+':
        return 80  # Assign 80 for this case
    lower, upper = map(int, age_range.split('-'))
    return (lower + upper) // 2

data['Age_Category'] = data['Age_Category'].apply(age_to_midpoint)


In [62]:
# Convert 'Smoking_History' column to numeric
data['Smoking_History'] = data['Smoking_History'].map({'No': 0, 'Yes': 1})


In [63]:
# Convert 'Exercise', 'Heart_Disease', 'Depression', and 'Diabetes' columns to numeric (Yes=1, No=0)
columns_to_convert = ['Exercise', 'Depression', 'Diabetes']

for column in columns_to_convert:
    data[column] = data[column].map({'No': 0, 'Yes': 1})


In [64]:
# Map 'General_Health' values to numeric values
health_mapping = {
    'Poor': 0,
    'Fair': 5,
    'Good': 10,
    'Very Good': 15,
    'Excellent': 20
}

data['General_Health'] = data['General_Health'].map(health_mapping)


In [65]:
# Map 'Sex' column to numeric values (Male = 0, Female = 1)
data['Sex'] = data['Sex'].map({'Male': 0, 'Female': 1})


In [66]:
print(data.head())

   General_Health                  Checkup  Exercise  Heart_Disease  \
0               0  Within the past 2 years         0              0   
1              15     Within the past year         0              1   
2              15     Within the past year         1              0   
3               0     Within the past year         1              1   
4              10     Within the past year         0              0   

  Skin_Cancer Other_Cancer  Depression  Diabetes Arthritis  Sex  Age_Category  \
0          No           No           0       0.0       Yes    1            72   
1          No           No           0       1.0        No    1            72   
2          No           No           0       1.0        No    1            62   
3          No           No           0       1.0        No    0            77   
4          No           No           0       0.0        No    0            80   

   Height_(cm)  Weight_(kg)    BMI  Smoking_History  Alcohol_Consumption  \
0        1

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Step 1: Load the dataset
# Ensure 'data' is loaded correctly before running the rest of the code
# Example: data = pd.read_csv('CVD_cleaned.csv')
print("Dataset loaded successfully.")

# Step 2: Select specified parameters
columns_to_use = [
    'General_Health', 'Exercise', 'Heart_Disease', 'Depression', 'Diabetes',
    'Sex', 'Age_Category', 'Height_(cm)', 'Weight_(kg)', 'Smoking_History',
    'Alcohol_Consumption', 'Green_Vegetables_Consumption'
]
data = data[columns_to_use]  # Subset the dataset with selected columns
print("Subset of data:")
print(data.head())  # Verify the subset

# Step 3: Handle missing values
# Impute missing values for numerical columns
imputer = SimpleImputer(strategy='mean')  # Replace 'mean' with 'median' if needed
for col in data.select_dtypes(include=['float64', 'int64']).columns:
    data[col] = imputer.fit_transform(data[[col]])

# Impute missing values for categorical columns (if any)
for col in data.select_dtypes(include='object').columns:
    data[col] = data[col].fillna("Unknown")  # Or impute with the most frequent value

# Step 4: Separate features and target
X = data.drop(columns=['Heart_Disease'])  # Replace 'Heart_Disease' with your target column name
y = data['Heart_Disease']

# Step 5: Encode categorical variables
encoder = LabelEncoder()
for col in X.select_dtypes(include='object').columns:
    X[col] = encoder.fit_transform(X[col])

# Step 6: Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 7: Scale numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 8: Define models
models = {
    'Logistic Regression': LogisticRegression(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Support Vector Machine': SVC(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
}

# Step 9: Train and evaluate each model
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy of {name}: {accuracy:.2f}")


Dataset loaded successfully.
Subset of data:
   General_Health  Exercise  Heart_Disease  Depression  Diabetes  Sex  \
0               0         0              0           0       0.0    1   
1              15         0              1           0       1.0    1   
2              15         1              0           0       1.0    1   
3               0         1              1           0       1.0    0   
4              10         0              0           0       0.0    0   

   Age_Category  Height_(cm)  Weight_(kg)  Smoking_History  \
0            72        150.0        32.66                1   
1            72        165.0        77.11                0   
2            62        163.0        88.45                0   
3            77        180.0        93.44                0   
4            80        191.0        88.45                1   

   Alcohol_Consumption  Green_Vegetables_Consumption  
0                  0.0                          16.0  
1                  0.0           