Hyperparameter Tuning with GridSearchCV or RandomizedSearchCV:
Step 1: Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler


Step 2: Load and Preprocess the Dataset

In [2]:
# Load the dataset
df = pd.read_csv('heart.csv')

# Handle missing values (if any)
df.fillna(df.median(), inplace=True)

# Encode categorical variables (if any)
df = pd.get_dummies(df, drop_first=True)

# Feature scaling (standardization)
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df.drop('target', axis=1))

# Convert scaled data back to DataFrame
df_scaled = pd.DataFrame(scaled_data, columns=df.drop('target', axis=1).columns)
df_scaled['target'] = df['target']


Step 3: Split the Data into Training and Test Sets

In [3]:
X = df_scaled.drop('target', axis=1)
y = df_scaled['target']

# Split the data into training and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Step 4: Train a Model (Random Forest or SVM)
Random Forest Classifier

In [4]:
# Initialize Random Forest model
rf_model = RandomForestClassifier(random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Predict on the test set
y_pred_rf = rf_model.predict(X_test)

# Evaluate the model
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Accuracy of Random Forest (before tuning): {accuracy_rf:.2f}")
print(classification_report(y_test, y_pred_rf))


Accuracy of Random Forest (before tuning): 0.99
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       102
           1       1.00      0.97      0.99       103

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205



Support Vector Machine (SVM)

In [5]:
# Initialize SVM model
svm_model = SVC(random_state=42)

# Train the model
svm_model.fit(X_train, y_train)

# Predict on the test set
y_pred_svm = svm_model.predict(X_test)

# Evaluate the model
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"Accuracy of SVM (before tuning): {accuracy_svm:.2f}")
print(classification_report(y_test, y_pred_svm))


Accuracy of SVM (before tuning): 0.89
              precision    recall  f1-score   support

           0       0.93      0.83      0.88       102
           1       0.85      0.94      0.89       103

    accuracy                           0.89       205
   macro avg       0.89      0.89      0.89       205
weighted avg       0.89      0.89      0.89       205



Step 5: Hyperparameter Tuning with GridSearchCV
Random Forest Hyperparameter Tuning with GridSearchCV

In [6]:
# Define hyperparameter grid for Random Forest
rf_param_grid = {
    'n_estimators': [50, 100, 150, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize GridSearchCV for Random Forest
grid_search_rf = GridSearchCV(estimator=RandomForestClassifier(random_state=42), 
                              param_grid=rf_param_grid, 
                              cv=5, 
                              n_jobs=-1, 
                              verbose=2)

# Fit GridSearchCV
grid_search_rf.fit(X_train, y_train)

# Get the best parameters and the best model
best_rf_model = grid_search_rf.best_estimator_

# Predict with the best model
y_pred_rf_tuned = best_rf_model.predict(X_test)

# Evaluate the tuned model
accuracy_rf_tuned = accuracy_score(y_test, y_pred_rf_tuned)
print(f"Accuracy of Random Forest (after tuning): {accuracy_rf_tuned:.2f}")
print(classification_report(y_test, y_pred_rf_tuned))


Fitting 5 folds for each of 144 candidates, totalling 720 fits
Accuracy of Random Forest (after tuning): 0.99
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       102
           1       1.00      0.97      0.99       103

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205



SVM Hyperparameter Tuning with GridSearchCV

In [7]:
# Define hyperparameter grid for SVM
svm_param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

# Initialize GridSearchCV for SVM
grid_search_svm = GridSearchCV(estimator=SVC(random_state=42), 
                               param_grid=svm_param_grid, 
                               cv=5, 
                               n_jobs=-1, 
                               verbose=2)

# Fit GridSearchCV
grid_search_svm.fit(X_train, y_train)

# Get the best parameters and the best model
best_svm_model = grid_search_svm.best_estimator_

# Predict with the best model
y_pred_svm_tuned = best_svm_model.predict(X_test)

# Evaluate the tuned model
accuracy_svm_tuned = accuracy_score(y_test, y_pred_svm_tuned)
print(f"Accuracy of SVM (after tuning): {accuracy_svm_tuned:.2f}")
print(classification_report(y_test, y_pred_svm_tuned))


Fitting 5 folds for each of 12 candidates, totalling 60 fits
Accuracy of SVM (after tuning): 0.98
              precision    recall  f1-score   support

           0       0.97      0.98      0.98       102
           1       0.98      0.97      0.98       103

    accuracy                           0.98       205
   macro avg       0.98      0.98      0.98       205
weighted avg       0.98      0.98      0.98       205



Step 6: Hyperparameter Tuning with RandomizedSearchCV (Alternative)
Random Forest with RandomizedSearchCV

In [8]:
from sklearn.model_selection import RandomizedSearchCV

# Define hyperparameter distribution for Random Forest
rf_param_dist = {
    'n_estimators': [50, 100, 150, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize RandomizedSearchCV for Random Forest
random_search_rf = RandomizedSearchCV(estimator=RandomForestClassifier(random_state=42), 
                                      param_distributions=rf_param_dist, 
                                      n_iter=10, 
                                      cv=5, 
                                      n_jobs=-1, 
                                      verbose=2)

# Fit RandomizedSearchCV
random_search_rf.fit(X_train, y_train)

# Get the best parameters and the best model
best_rf_model_random = random_search_rf.best_estimator_

# Predict with the best model
y_pred_rf_random = best_rf_model_random.predict(X_test)

# Evaluate the tuned model
accuracy_rf_random = accuracy_score(y_test, y_pred_rf_random)
print(f"Accuracy of Random Forest (after RandomizedSearchCV): {accuracy_rf_random:.2f}")
print(classification_report(y_test, y_pred_rf_random))


Fitting 5 folds for each of 10 candidates, totalling 50 fits
Accuracy of Random Forest (after RandomizedSearchCV): 0.99
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       102
           1       1.00      0.97      0.99       103

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205



SVM with RandomizedSearchCV

In [9]:
# Define hyperparameter distribution for SVM
svm_param_dist = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

# Initialize RandomizedSearchCV for SVM
random_search_svm = RandomizedSearchCV(estimator=SVC(random_state=42), 
                                       param_distributions=svm_param_dist, 
                                       n_iter=10, 
                                       cv=5, 
                                       n_jobs=-1, 
                                       verbose=2)

# Fit RandomizedSearchCV
random_search_svm.fit(X_train, y_train)

# Get the best parameters and the best model
best_svm_model_random = random_search_svm.best_estimator_

# Predict with the best model
y_pred_svm_random = best_svm_model_random.predict(X_test)

# Evaluate the tuned model
accuracy_svm_random = accuracy_score(y_test, y_pred_svm_random)
print(f"Accuracy of SVM (after RandomizedSearchCV): {accuracy_svm_random:.2f}")
print(classification_report(y_test, y_pred_svm_random))


Fitting 5 folds for each of 10 candidates, totalling 50 fits
Accuracy of SVM (after RandomizedSearchCV): 0.98
              precision    recall  f1-score   support

           0       0.97      0.98      0.98       102
           1       0.98      0.97      0.98       103

    accuracy                           0.98       205
   macro avg       0.98      0.98      0.98       205
weighted avg       0.98      0.98      0.98       205



Step 7: Compare Performance
Comparison of Accuracy:

In [10]:
print(f"Accuracy of Random Forest (before tuning): {accuracy_rf:.2f}")
print(f"Accuracy of Random Forest (after tuning): {accuracy_rf_tuned:.2f}")
print(f"Accuracy of Random Forest (after RandomizedSearchCV): {accuracy_rf_random:.2f}")

print(f"Accuracy of SVM (before tuning): {accuracy_svm:.2f}")
print(f"Accuracy of SVM (after tuning): {accuracy_svm_tuned:.2f}")
print(f"Accuracy of SVM (after RandomizedSearchCV): {accuracy_svm_random:.2f}")


Accuracy of Random Forest (before tuning): 0.99
Accuracy of Random Forest (after tuning): 0.99
Accuracy of Random Forest (after RandomizedSearchCV): 0.99
Accuracy of SVM (before tuning): 0.89
Accuracy of SVM (after tuning): 0.98
Accuracy of SVM (after RandomizedSearchCV): 0.98
