In [None]:
#AdaBoost classifier and regressor

In [9]:
#adaboost classifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings 
warnings.filterwarnings('ignore')

In [2]:
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=1)

In [5]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [6]:
classifier = AdaBoostClassifier()
classifier.fit(X_train, y_train)



In [7]:
# Make predictions
y_pred = classifier.predict(X_test)

# Evaluate the model
print("Current model performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Current model performance:
Accuracy: 0.806060606060606
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.80      0.82       176
           1       0.78      0.81      0.80       154

    accuracy                           0.81       330
   macro avg       0.81      0.81      0.81       330
weighted avg       0.81      0.81      0.81       330

Confusion Matrix:
[[141  35]
 [ 29 125]]


In [10]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1.0, 1.5, 2.0],
    'algorithm': ['SAMME', 'SAMME.R']
}
ada = AdaBoostClassifier()
clf = GridSearchCV(estimator=ada,param_grid=param_grid, cv=5, verbose = 3, n_jobs=-1) 
#if njobs = -1, you will not gget all fitting information

In [11]:
clf.fit(X_train, y_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits


In [12]:
clf.best_params_

{'algorithm': 'SAMME', 'learning_rate': 0.1, 'n_estimators': 200}

In [15]:
best_model = clf.best_estimator_

In [16]:
y_pred_tuned = best_model.predict(X_test)

# Evaluate the tuned model
print("Tuned model performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_tuned)}")
print("Classification Report:")
print(classification_report(y_test, y_pred_tuned))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_tuned))

Tuned model performance:
Accuracy: 0.8484848484848485
Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.85      0.86       176
           1       0.83      0.85      0.84       154

    accuracy                           0.85       330
   macro avg       0.85      0.85      0.85       330
weighted avg       0.85      0.85      0.85       330

Confusion Matrix:
[[149  27]
 [ 23 131]]


In [17]:
#adaboost regressor

from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [18]:
X, y = make_regression(n_samples=1000, n_features=2, noise =10,random_state=1)
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [19]:
regressor = AdaBoostRegressor()
regressor.fit(X_train, y_train)

In [20]:
y_pred = regressor.predict(X_test)

# Evaluate the model
print("Current model performance:")
print(f"R2 score: {r2_score(y_test, y_pred)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")

Current model performance:
R2 score: 0.9641705203324348
Mean Absolute Error: 12.554931647413753
Mean Squared Error: 280.68588886138036


In [21]:
#hyperparameter tuning
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.5, 1.0, 1.5],
    'loss': ['linear', 'square', 'exponential']
}
grid_search = GridSearchCV(estimator=regressor, param_grid=param_grid, cv = 5, verbose = 3)

In [22]:
grid_search

In [23]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV 1/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.918 total time=   0.0s
[CV 2/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.926 total time=   0.0s
[CV 3/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.940 total time=   0.0s
[CV 4/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.944 total time=   0.0s
[CV 5/5] END learning_rate=0.01, loss=linear, n_estimators=50;, score=0.937 total time=   0.0s
[CV 1/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.925 total time=   0.0s
[CV 2/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.929 total time=   0.0s
[CV 3/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.940 total time=   0.0s
[CV 4/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.942 total time=   0.0s
[CV 5/5] END learning_rate=0.01, loss=linear, n_estimators=100;, score=0.936 to

[CV 1/5] END learning_rate=0.1, loss=exponential, n_estimators=200;, score=0.947 total time=   0.2s
[CV 2/5] END learning_rate=0.1, loss=exponential, n_estimators=200;, score=0.955 total time=   0.2s
[CV 3/5] END learning_rate=0.1, loss=exponential, n_estimators=200;, score=0.963 total time=   0.2s
[CV 4/5] END learning_rate=0.1, loss=exponential, n_estimators=200;, score=0.964 total time=   0.2s
[CV 5/5] END learning_rate=0.1, loss=exponential, n_estimators=200;, score=0.955 total time=   0.2s
[CV 1/5] END learning_rate=0.5, loss=linear, n_estimators=50;, score=0.949 total time=   0.0s
[CV 2/5] END learning_rate=0.5, loss=linear, n_estimators=50;, score=0.958 total time=   0.0s
[CV 3/5] END learning_rate=0.5, loss=linear, n_estimators=50;, score=0.963 total time=   0.0s
[CV 4/5] END learning_rate=0.5, loss=linear, n_estimators=50;, score=0.966 total time=   0.0s
[CV 5/5] END learning_rate=0.5, loss=linear, n_estimators=50;, score=0.958 total time=   0.0s
[CV 1/5] END learning_rate=0.5

[CV 3/5] END learning_rate=1.0, loss=exponential, n_estimators=100;, score=0.971 total time=   0.0s
[CV 4/5] END learning_rate=1.0, loss=exponential, n_estimators=100;, score=0.973 total time=   0.0s
[CV 5/5] END learning_rate=1.0, loss=exponential, n_estimators=100;, score=0.967 total time=   0.0s
[CV 1/5] END learning_rate=1.0, loss=exponential, n_estimators=200;, score=0.964 total time=   0.2s
[CV 2/5] END learning_rate=1.0, loss=exponential, n_estimators=200;, score=0.961 total time=   0.2s
[CV 3/5] END learning_rate=1.0, loss=exponential, n_estimators=200;, score=0.972 total time=   0.2s
[CV 4/5] END learning_rate=1.0, loss=exponential, n_estimators=200;, score=0.976 total time=   0.2s
[CV 5/5] END learning_rate=1.0, loss=exponential, n_estimators=200;, score=0.966 total time=   0.2s
[CV 1/5] END learning_rate=1.5, loss=linear, n_estimators=50;, score=0.958 total time=   0.0s
[CV 2/5] END learning_rate=1.5, loss=linear, n_estimators=50;, score=0.959 total time=   0.0s
[CV 3/5] END

In [24]:
grid_search.best_params_

{'learning_rate': 1.5, 'loss': 'square', 'n_estimators': 100}

In [25]:
best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(X_test)

# Evaluate the tuned model
print("Tuned model performance:")
print(f"R2 score: {r2_score(y_test, y_pred_tuned)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred_tuned)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred_tuned)}")

Tuned model performance:
R2 score: 0.9683238022323749
Mean Absolute Error: 11.926031150871824
Mean Squared Error: 248.14933983547002
