# Import Required Libraries
Import the necessary libraries, including pandas, NumPy, and scikit-learn.

In [None]:
# Import the necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load and Explore the Dataset
Load the coronary artery disease dataset and perform exploratory data analysis (EDA) to understand the data.

In [None]:
# Load and Explore the Dataset

# Load the coronary artery disease dataset
url = './heart_cleveland_upload.csv'  # Replace with the actual URL or file path
data = pd.read_csv(url)

# Display the first few rows of the dataset
data.head()

# Display basic information about the dataset
data.info()

# Check for missing values
data.isnull().sum()

# Display summary statistics of the dataset
data.describe()

# Visualize the distribution of the target variable
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(8, 6))
sns.countplot(x='target', data=data)
plt.title('Distribution of Target Variable')
plt.xlabel('Target')
plt.ylabel('Count')
plt.show()

# Visualize the correlation matrix
plt.figure(figsize=(12, 10))
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.show()

# Preprocess the Data
Handle missing values, encode categorical variables, and scale the features if necessary.

In [None]:
# Preprocess the Data

# Handle missing values by filling them with the median value of each column
data.fillna(data.median(), inplace=True)

# Encode categorical variables using one-hot encoding
data = pd.get_dummies(data, drop_first=True)

# Scale the features using StandardScaler
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_features = scaler.fit_transform(data.drop('target', axis=1))

# Create a DataFrame with the scaled features
scaled_data = pd.DataFrame(scaled_features, columns=data.columns[:-1])

# Add the target variable back to the DataFrame
scaled_data['target'] = data['target'].values

# Display the first few rows of the preprocessed dataset
scaled_data.head()

# Split the Data into Training and Testing Sets
Split the dataset into training and testing sets using train_test_split from scikit-learn.

In [None]:
# Split the Data into Training and Testing Sets

# Define the features (X) and the target (y)
X = scaled_data.drop('target', axis=1)
y = scaled_data['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the shapes of the training and testing sets
X_train.shape, X_test.shape, y_train.shape, y_test.shape

# Train the Logistic Regression Model
Train a logistic regression model using the training data.

In [None]:
# Train the Logistic Regression Model

# Initialize the logistic regression model
logistic_model = LogisticRegression()

# Train the model using the training data
logistic_model.fit(X_train, y_train)

# Predict the target values for the test set
y_pred = logistic_model.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Print the evaluation metrics
print(f'Accuracy: {accuracy}')
print('Confusion Matrix:')
print(conf_matrix)
print('Classification Report:')
print(class_report)

# Evaluate the Model
Evaluate the model's performance using metrics such as accuracy, precision, recall, and the ROC curve.

In [None]:
# Evaluate the Model

# Import additional necessary libraries for evaluation
from sklearn.metrics import precision_score, recall_score, roc_auc_score, roc_curve

# Calculate precision and recall
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

# Calculate ROC AUC score
roc_auc = roc_auc_score(y_test, y_pred)

# Print precision, recall, and ROC AUC score
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'ROC AUC Score: {roc_auc}')

# Plot the ROC curve
y_pred_prob = logistic_model.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

# Make Predictions
Use the trained model to make predictions on new data.

In [None]:
# Make Predictions

# Define new data for prediction (example data)
new_data = np.array([[63, 1, 3, 145, 233, 1, 0, 150, 0, 2.3, 0, 0, 1]])

# Scale the new data using the same scaler used for training data
new_data_scaled = scaler.transform(new_data)

# Make predictions using the trained logistic regression model
new_predictions = logistic_model.predict(new_data_scaled)

# Print the predictions
print(f'Predictions for the new data: {new_predictions}')