# AdaBoost Classifier
AdaBoost (Adaptive Boosting) is an ensemble learning technique that combines multiple weak classifiers to create a strong classifier. It works by iteratively training weak classifiers on different distributions of the data and then combining their predictions through a weighted majority vote.

## Advantages:
- Improved Accuracy: By combining weak learners, AdaBoost can significantly improve prediction accuracy.
- Versatility: Can be used with various base classifiers.
- Resistance to Overfitting: Less prone to overfitting compared to other models.
- Simple and Fast: Easy to implement and relatively fast to train.

## Disadvantages:
- Sensitivity to Noisy Data: Performance can degrade with noisy data and outliers.
- Computationally Intensive: Requires multiple iterations which can be computationally expensive.
- Requires Careful Tuning: The choice of base classifier and its parameters significantly affect performance.

## Use Case:
- Image Recognition: Used for face detection and other image classification tasks.
- Spam Detection: Helps in distinguishing between spam and non-spam emails.
- Customer Churn Prediction: Predicts whether a customer will stop using a service.
- Credit Scoring: Assesses the risk of lending money to potential borrowers.

## Scaling (not necessary and necessary Depend on the models)
AdaBoost itself does not require scaling, but it depends on the base estimator used. For example, SVMs require scaling, while decision trees do not.

## Encoding (necessary)
Categorical data must be encoded into numerical values.

# Import Libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from scipy.stats import uniform, loguniform
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.datasets import make_Classifier

In [None]:
df = pd.read_csv('Breast_Cancer.csv')
x = df.drop('diagnosis',axis=1)
y = df['diagnosis']

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# 1. AdaBoost with the Default Estimator (Decision Tree)

## Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

# Create the AdaBoost Regressor with default estimator (DecisionTreeClassifier)
AdaBoost_clas = AdaBoostClassifier(estimator=DecisionTreeClassifier(), n_estimators=50, random_state=42)

# Define parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [10, 50, 100, 200],
    'estimator__max_depth': [None, 10, 20, 30],
    'estimator__min_samples_split': [2, 5, 10]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(AdaBoost_clas, param_grid, cv=5, n_jobs=-1)

# Train the grid search
grid_search.fit(x_train, y_train)

In [None]:
print("Best Hyperparameter Index:", grid_search.best_index_)
print("Best Hyperparameters:", grid_search.best_params_)
print("Best Cross-Validated Score:", grid_search.best_score_)

In [None]:
# Get the model with best hyperparameters
model = grid_search.best_estimator_
# y_pred = model.predict(x_test)

## Randomized Search

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

# Create the AdaBoost Regressor with default estimator (DecisionTreeClassifier)
AdaBoost_clas = AdaBoostClassifier(estimator=DecisionTreeClassifier(), n_estimators=50, random_state=42)

# Define parameter distribution for RandomizedSearchCV
param_dist = {
    'n_estimators': [10, 50, 100, 200],
    'estimator__max_depth': [None, 10, 20, 30, 40, 50],
    'estimator__min_samples_split': [2, 5, 10, 15]
}

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(AdaBoost_clas, param_distributions=param_dist, n_iter=50, cv=5, n_jobs=-1, random_state=42)

# Train the grid search
random_search.fit(x_train, y_train)

In [None]:
print("Best Hyperparameter Index:", random_search.best_index_)
print("Best Hyperparameters:", random_search.best_params_)
print("Best Cross-Validated Score:", random_search.best_score_)

In [None]:
model = random_search.best_estimator_
# y_pred = model.predict(x_test)

## Train AdaBoostClassifier without search

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

model = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=10, min_samples_split=5),n_estimators=50,random_state=42)
# model.fit(x_train, y_train)

# 2. AdaBoost with a Single Estimator (Support Vector Classifier)

## Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC


# Create the AdaBoost Regressor with SVC
AdaBoost_clas_SVC = AdaBoostClassifier(estimator=SVC(probability=True), random_state=42)


# Define parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [10, 50, 100, 200],
    'estimator__C': [0.1, 1, 10, 100],
    'estimator__gamma': ['scale', 'auto'] + list(np.logspace(-9, 3, 13)),
    'estimator__kernel': ['linear', 'poly', 'rbf']
}

# Initialize GridSearchCV
grid_search = GridSearchCV(AdaBoost_clas_SVC, param_grid, cv=5, n_jobs=-1)

# Train the grid search
grid_search.fit(x_train, y_train)

In [None]:
print("Best Hyperparameter Index:", grid_search.best_index_)
print("Best Hyperparameters:", grid_search.best_params_)
print("Best Cross-Validated Score:", grid_search.best_score_)

In [None]:
# Get the model with best hyperparameters
model = grid_search.best_estimator_
# y_pred = model.predict(x_test)

## Randomized Search

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC

# Create the AdaBoost Regressor with default estimator (DecisionTreeClassifier)
AdaBoost_clas_SVC = AdaBoostClassifier(estimator=SVC(probability=True), random_state=42)

# Define parameter distribution for RandomizedSearchCV
param_dist = {
    'n_estimators': [10, 50, 100, 200],
    'estimator__C': [0.1, 1, 10, 100],
    'estimator__gamma': ['scale', 'auto'] + list(np.logspace(-9, 3, 13)),
    'estimator__kernel': ['linear', 'poly', 'rbf']
}

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(AdaBoost_clas_SVC, param_distributions=param_dist, n_iter=50, cv=5, n_jobs=-1, random_state=42)

# Train the grid search
random_search.fit(x_train, y_train)

In [None]:
print("Best Hyperparameter Index:", random_search.best_index_)
print("Best Hyperparameters:", random_search.best_params_)
print("Best Cross-Validated Score:", random_search.best_score_)

In [None]:
model = random_search.best_estimator_
# y_pred = model.predict(x_test)

## Train AdaBoostClassifier without search

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC

model = AdaBoostClassifier(estimator=SVC(kernel='linear', gamma=1, C=100),n_estimators=50,random_state=42)
# model.fit(x_train, y_train)

# 3. AdaBoost with Multiple Estimators (SVC, Decision Tree, GaussianNB)

## Grid Search

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

# Create the individual regressors without the Pipeline of scaler
svc = SVC(probability=True)
decision_tree = DecisionTreeClassifier()
gaussian_NB = GaussianNB()


# Create the VotingClassifier with the different models
voting_regressor = VotingClassifier(estimators=[
    ('SVC', svc),
    ('decision_tree', decision_tree),
    ('gaussian_NB', gaussian_NB)
])

# Create the AdaBoost Regressor with VotingClassifier
AdaBoost_clas_voting = AdaBoostClassifier(estimator=voting_regressor, random_state=42)

# Define parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [10, 50, 100, 200],
    'estimator__SVC__C': [0.1, 1, 10],
    'estimator__SVC__gamma': [0.1, 0.5, 1],
    'estimator__SVC__kernel': ['linear', 'poly', 'rbf'],
    'estimator__decision_tree__max_depth': [None, 10, 20, 30],
    'estimator__decision_tree__min_samples_split': [2, 5, 10],
    'estimator__gaussian_NB__var_smoothing': np.logspace(-9, 0, 10)
}

# Initialize GridSearchCV
grid_search_voting = GridSearchCV(AdaBoost_clas_voting, param_grid, cv=5, n_jobs=-1)

# Train the grid search
grid_search_voting.fit(x_train, y_train)

In [None]:
print("Best Hyperparameter Index:", grid_search.best_index_)
print("Best Hyperparameters:", grid_search.best_params_)
print("Best Cross-Validated Score:", grid_search.best_score_)

In [None]:
# Get the model with best hyperparameters
model = grid_search.best_estimator_
# y_pred = model.predict(x_test)

## Randomized Search

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

# Create the individual classifiers
svc = SVC(probability=True)
decision_tree = DecisionTreeClassifier()
gaussian_NB = GaussianNB()

# Create the VotingClassifier with the different models
voting_classifier = VotingClassifier(estimators=[
    ('SVC', svc),
    ('decision_tree', decision_tree),
    ('gaussian_NB', gaussian_NB)
])

# Create the AdaBoostClassifier with VotingClassifier
AdaBoost_clas_voting = AdaBoostClassifier(estimator=voting_classifier, algorithm='SAMME', random_state=42)

# Define parameter distribution for RandomizedSearchCV
param_dist = {
    'n_estimators': [10, 50, 100, 200],
    'estimator__SVC__C': [0.1, 1, 10, 100],
    'estimator__SVC__gamma': [0.1, 0.2, 0.5, 1.0],
    'estimator__SVC__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'estimator__decision_tree__max_depth': [None, 10, 20, 30, 40, 50],
    'estimator__decision_tree__min_samples_split': [2, 5, 10, 15],
    'estimator__gaussian_NB__var_smoothing': np.logspace(-9, 0, 10)
}

# Initialize RandomizedSearchCV
random_search_voting = RandomizedSearchCV(AdaBoost_clas_voting, param_distributions=param_dist, n_iter=50, cv=5, n_jobs=-1, random_state=42)

# Train the grid search
random_search_voting.fit(x_train, y_train)


In [None]:
print("Best Hyperparameter Index:", random_search.best_index_)
print("Best Hyperparameters:", random_search.best_params_)
print("Best Cross-Validated Score:", random_search.best_score_)

In [None]:
model = random_search.best_estimator_
# y_pred = model.predict(x_test)

## Train AdaBoostClassifier without search

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC

# Create the individual regressors without the Pipeline of scaler
svc = SVC(kernel='linear', gamma=1, C=1)
decision_tree = DecisionTreeClassifier(max_depth=5, min_samples_split=2)
gaussian_NB = GaussianNB(var_smoothing=0.001)

# Create the VotingClassifier with the different models
voting_regressor = VotingClassifier(estimators=[
    ('SVC', svc),
    ('decision_tree', decision_tree),
    ('gaussian_NB', gaussian_NB)
])


model = AdaBoostClassifier(estimator=voting_regressor,n_estimators=50,random_state=42)
# model.fit(x_train, y_train)

