In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import AdaBoostClassifier , GradientBoostingClassifier ,StackingClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

df = pd.read_csv('heart_disease_uci.csv')

#cat col
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()

#missing values for num col
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()
imputer_numeric = SimpleImputer(strategy='mean')
df[numerical_cols] = pd.DataFrame(imputer_numeric.fit_transform(df[numerical_cols]), columns=numerical_cols)


#cat col to num
label_encoder = LabelEncoder()
df[categorical_cols] = df[categorical_cols].apply(label_encoder.fit_transform)

#missing values for cat col
imputer_categorical = ColumnTransformer(
    transformers=[
        ('sex', SimpleImputer(strategy='most_frequent'), ['sex']),
        ('dataset', SimpleImputer(strategy='most_frequent'), ['dataset']),
        ('cp', SimpleImputer(strategy='median'), ['cp']),
        ('fbs', SimpleImputer(strategy='most_frequent'), ['fbs']),
        ('restecg', SimpleImputer(strategy='most_frequent'), ['restecg']),
        ('exang', SimpleImputer(strategy='most_frequent'), ['exang']),
        ('slope', SimpleImputer(strategy='mean'), ['slope']),
        ('thal', SimpleImputer(strategy='most_frequent'), ['thal']),
    ]
)
df[categorical_cols] = pd.DataFrame(imputer_categorical.fit_transform(df[categorical_cols]), columns=categorical_cols)

df['num'] = df['num'].apply(lambda x: 0 if x == 0 else 1)

X = df.drop('num', axis=1)
y = df['num']

# Normalize
scaler = StandardScaler()
normalized_data = scaler.fit_transform(X)
X = pd.DataFrame(normalized_data, columns=X.columns)

#split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


adaboost_classifier = AdaBoostClassifier()

param_grid = {
    'n_estimators': [10, 15, 18, 25, 50, 100, 200],
}

#GridSearchCV
grid_search = GridSearchCV(adaboost_classifier, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)


print("Best Parameters:", grid_search.best_params_)
print("Best Accuracy:", grid_search.best_score_)

test_accuracy = grid_search.best_estimator_.score(X_test, y_test)
print("Test Set Accuracy:", test_accuracy)


Best Parameters: {'n_estimators': 15}
Best Accuracy: 0.8736440522154808
Test Set Accuracy: 0.842391304347826


In [None]:
adaboost_classifier = AdaBoostClassifier(n_estimators=15)
#Cross-Validation
k_values = [3, 5, 7]

for k in k_values:
    cross_val_scores = cross_val_score(adaboost_classifier, X_train, y_train, cv=k, scoring='accuracy')
    print(f'Cross-Validation Accuracy (k={k}): {cross_val_scores.mean()}')


adaboost_classifier.fit(X_train, y_train)
test_accuracy = adaboost_classifier.score(X_test, y_test)
print("Test Set Accuracy:", test_accuracy)


Cross-Validation Accuracy (k=3): 0.86548310381063
Cross-Validation Accuracy (k=5): 0.8736440522154808
Cross-Validation Accuracy (k=7): 0.8696187909125914
Test Set Accuracy: 0.842391304347826


In [None]:
#Step 1: Prepare the Data => first cell

#Step 2: Model Selection
base_models = [
    ('svm', SVC(kernel='linear', probability=True)),
    ('decision_tree', DecisionTreeClassifier()),
    ('gradient_boosting', GradientBoostingClassifier())
]

#Step 3: Training the Base Models
for name, model in base_models:
    model.fit(X_train, y_train)

#Step 4: Predictions on the Validation Set
base_model_predictions = {name: model.predict(X_test) for name, model in base_models}

#Step 5: Developing a Meta Model
meta_model = LogisticRegression()

#Step 6: Training the Meta Model
meta_model_input = pd.DataFrame(base_model_predictions)
meta_model.fit(meta_model_input, y_test)

#Step 7: Making Test Set Predictions
stacking_input = pd.DataFrame({name: model.predict(X_test) for name, model in base_models})
stacking_predictions = meta_model.predict(stacking_input)

#Step 8: Model Evaluation
stacking_accuracy = accuracy_score(y_test, stacking_predictions)
stacking_classification_report = classification_report(y_test, stacking_predictions)

print("Accuracy:", stacking_accuracy)
print("Classification:\n", stacking_classification_report)


Accuracy: 0.8695652173913043
Classification:
               precision    recall  f1-score   support

           0       0.83      0.85      0.84        75
           1       0.90      0.88      0.89       109

    accuracy                           0.87       184
   macro avg       0.86      0.87      0.87       184
weighted avg       0.87      0.87      0.87       184



In [None]:
from xgboost import XGBClassifier
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

df = pd.read_csv('heart_disease_uci.csv')
#cat col
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
#cat col to num
label_encoder = LabelEncoder()
df[categorical_cols] = df[categorical_cols].apply(label_encoder.fit_transform)
df['num'] = df['num'].apply(lambda x: 0 if x == 0 else 1)
X = df.drop('num', axis=1)
y = df['num']
# Normalize
scaler = StandardScaler()
normalized_data = scaler.fit_transform(X)
X = pd.DataFrame(normalized_data, columns=X.columns)
#split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
xgb_model = XGBClassifier()
grid_search = GridSearchCV(xgb_model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

best_n_estimators = grid_search.best_params_['n_estimators']

k_values = [3, 5, 7]

for k in k_values:
    xgb_model = XGBClassifier(n_estimators=best_n_estimators)
    xgb_model.fit(X_train, y_train)

    y_pred = xgb_model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print(f'\nEvaluation Metrics with k={k}:')
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1-Score: {f1:.4f}')


Evaluation Metrics with k=3:
Accuracy: 0.8587
Precision: 0.9109
Recall: 0.8440
F1-Score: 0.8762

Evaluation Metrics with k=5:
Accuracy: 0.8587
Precision: 0.9109
Recall: 0.8440
F1-Score: 0.8762

Evaluation Metrics with k=7:
Accuracy: 0.8587
Precision: 0.9109
Recall: 0.8440
F1-Score: 0.8762
