In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
heart_disease = fetch_ucirepo(id=45)

# data (as pandas dataframes)
X = heart_disease.data.features
y = heart_disease.data.targets

# metadata
print(heart_disease.metadata)

# variable information
print(heart_disease.variables)


{'uci_id': 45, 'name': 'Heart Disease', 'repository_url': 'https://archive.ics.uci.edu/dataset/45/heart+disease', 'data_url': 'https://archive.ics.uci.edu/static/public/45/data.csv', 'abstract': '4 databases: Cleveland, Hungary, Switzerland, and the VA Long Beach', 'area': 'Health and Medicine', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 303, 'num_features': 13, 'feature_types': ['Categorical', 'Integer', 'Real'], 'demographics': ['Age', 'Sex'], 'target_col': ['num'], 'index_col': None, 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 1989, 'last_updated': 'Fri Nov 03 2023', 'dataset_doi': '10.24432/C52P4X', 'creators': ['Andras Janosi', 'William Steinbrunn', 'Matthias Pfisterer', 'Robert Detrano'], 'intro_paper': {'ID': 231, 'type': 'NATIVE', 'title': 'International application of a new probability algorithm for the diagnosis of coronary artery disease.', 'authors': 'R. Detrano, A. Jánosi, W. Steinbrunn, M

In [10]:
import os
project_path = '/content/drive/MyDrive/AIML_Project'
os.chdir(project_path)
print("📁 Current working directory:", os.getcwd())


📁 Current working directory: /content/drive/MyDrive/AIML_Project


In [11]:
# Install ucimlrepo if not installed
!pip install ucimlrepo --quiet

from ucimlrepo import fetch_ucirepo

# Fetch dataset
heart_disease = fetch_ucirepo(id=45)

# Data (as pandas DataFrames)
X = heart_disease.data.features
y = heart_disease.data.targets

print("✅ Dataset loaded successfully!")
print("Shape of features:", X.shape)
print("Shape of target:", y.shape)


✅ Dataset loaded successfully!
Shape of features: (303, 13)
Shape of target: (303, 1)


In [12]:
import pandas as pd
import numpy as np

# Combine features and target into one DataFrame
data = X.copy()
data['target'] = y

# Convert target column values: 0 = no disease, 1 = disease present
data['target'] = data['target'].apply(lambda x: 1 if x > 0 else 0)

# Drop missing values
data = data.dropna()

print("✅ Cleaned dataset shape:", data.shape)
data.head()


✅ Cleaned dataset shape: (297, 14)


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0.0,6.0,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3.0,3.0,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2.0,7.0,1
3,37,1,3,130,250,0,0,187,0,3.5,3,0.0,3.0,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0.0,3.0,0


In [13]:
from sklearn.model_selection import train_test_split

X_clean = data.drop('target', axis=1)
y_clean = data['target']

X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, test_size=0.2, random_state=42)

print("Training set:", X_train.shape)
print("Testing set:", X_test.shape)


Training set: (237, 13)
Testing set: (60, 13)


In [14]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("✅ Data scaling complete!")


✅ Data scaling complete!


In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)

# Evaluation
print("🎯 Model Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


🎯 Model Accuracy: 0.8833333333333333

Confusion Matrix:
 [[32  4]
 [ 3 21]]

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.89      0.90        36
           1       0.84      0.88      0.86        24

    accuracy                           0.88        60
   macro avg       0.88      0.88      0.88        60
weighted avg       0.88      0.88      0.88        60



In [16]:
import joblib

# Save inside your AIML_Project folder
joblib.dump(model, 'heart_disease_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

print("✅ Model and Scaler saved to Drive folder:")
print(os.path.join(project_path, 'heart_disease_model.pkl'))
print(os.path.join(project_path, 'scaler.pkl'))


✅ Model and Scaler saved to Drive folder:
/content/drive/MyDrive/AIML_Project/heart_disease_model.pkl
/content/drive/MyDrive/AIML_Project/scaler.pkl


In [17]:
# Example sample input (13 features in same order)
sample = np.array([[63, 1, 3, 145, 233, 1, 0, 150, 0, 2.3, 0, 0, 1]])
sample_scaled = scaler.transform(sample)
pred = model.predict(sample_scaled)

print("🩺 Predicted Risk:", "Heart Disease" if pred[0] == 1 else "No Heart Disease")


🩺 Predicted Risk: No Heart Disease




In [18]:
from google.colab import files
files.download('heart_disease_model.pkl')
files.download('scaler.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>