In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

# Load data
df = pd.read_csv('/content/heart.csv')
print(df.head())

   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  


In [None]:
# Separate features and target variable
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training set shape: {X_train.shape}")
print(f"Testing set shape: {X_test.shape}")


Training set shape: (242, 13)
Testing set shape: (61, 13)


In [None]:
# Initialize classifiers
rf = RandomForestClassifier()
gb = GradientBoostingClassifier()
svc = SVC()
lr = LogisticRegression()

# Train and evaluate RandomForestClassifier
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print(f"RandomForestClassifier accuracy: {accuracy_score(y_test, y_pred)}")

# Train and evaluate GradientBoostingClassifier
gb.fit(X_train, y_train)
y_pred = gb.predict(X_test)
print(f"GradientBoostingClassifier accuracy: {accuracy_score(y_test, y_pred)}")

# Train and evaluate SVC
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
print(f"SVC accuracy: {accuracy_score(y_test, y_pred)}")

# Train and evaluate LogisticRegression
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
print(f"LogisticRegression accuracy: {accuracy_score(y_test, y_pred)}")

RandomForestClassifier accuracy: 0.8360655737704918
GradientBoostingClassifier accuracy: 0.7704918032786885
SVC accuracy: 0.7049180327868853
LogisticRegression accuracy: 0.8852459016393442


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
# Reduced parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [20, 60],
    'max_features': [0.2, 0.6],
    'max_depth': [2, 8],
    'max_samples': [0.5, 0.75]
}

# Initialize GridSearchCV
rf_grid = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, verbose=2, n_jobs=-1)

# Fit GridSearchCV
rf_grid.fit(X_train, y_train)

# Print best parameters and best score
print(f"Best parameters found by GridSearchCV: {rf_grid.best_params_}")
print(f"Best score found by GridSearchCV: {rf_grid.best_score_}")


Fitting 5 folds for each of 16 candidates, totalling 80 fits
Best parameters found by GridSearchCV: {'max_depth': 2, 'max_features': 0.2, 'max_samples': 0.75, 'n_estimators': 60}
Best score found by GridSearchCV: 0.8304421768707483


In [None]:
# Define parameter distributions for RandomizedSearchCV
param_dist = {
    'n_estimators': [20, 60, 100, 120],
    'max_features': [0.2, 0.6, 1.0],
    'max_depth': [2, 8, None],
    'max_samples': [0.5, 0.75, 1.0],
    'bootstrap': [True, False],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

# Initialize RandomizedSearchCV
rf_random = RandomizedSearchCV(estimator=rf, param_distributions=param_dist, n_iter=10, cv=5, verbose=2, n_jobs=-1, random_state=42)

# Fit RandomizedSearchCV
rf_random.fit(X_train, y_train)

# Print best parameters and best score
print(f"Best parameters found by RandomizedSearchCV: {rf_random.best_params_}")
print(f"Best score found by RandomizedSearchCV: {rf_random.best_score_}")


Fitting 5 folds for each of 10 candidates, totalling 50 fits


25 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/ensemble/_forest.py", line 397, in fit
    raise ValueError(
ValueError: `max_sample` cannot be set if `bootstrap=False`. Either switch to `bootstrap=True` or set `max_sample=None`.

        nan 0.81802721        nan        nan]


Best parameters found by RandomizedSearchCV: {'n_estimators': 120, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_samples': 0.75, 'max_features': 0.2, 'max_depth': 2, 'bootstrap': True}
Best score found by RandomizedSearchCV: 0.8346088435374149


In [None]:
# Initialize GridSearchCV with fewer folds
rf_grid = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, verbose=2, n_jobs=-1)
