# Load important library  

In [2]:
#load important lib ...
#load_breast_cancer: built-in dataset (features X, labels y)...
from sklearn.datasets import load_breast_cancer
#train_test_split: splits data into training and test sets...
from sklearn.model_selection import train_test_split
#StandardScaler: standaries feature to mean 0 and the variance 1 ...
from sklearn.preprocessing import StandardScaler
#main ensemble class for Bagging ,Boosting,Stacking ...
from sklearn.ensemble import BaggingClassifier ,StackingClassifier,AdaBoostClassifier

In [3]:
#RandomForestClassifier ,GradientBoostingClassifier: extra ensemble models used inside stacking ...
from sklearn.ensemble import RandomForestClassifier ,GradientBoostingClassifier
#LogisticRegression :use as meta model (final estimator) inside stacking ...
from sklearn.linear_model import LogisticRegression
#DecisionTreeClassifier: (base learner/weak learner) use for Bagging and Boosting ...
from sklearn.tree import DecisionTreeClassifier
#accuracy_score: to count the accuracy ...
from sklearn.metrics import accuracy_score

# Load Dataset

In [4]:
#load data ...
data=load_breast_cancer()
#matrix of features (n_samples,n_features)
X=data.data
#target labels (0=malignant,1=bengin)
Y=data.target

# Splitting data into Training and Testing sets

In [5]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=42)

# Feature Scaling

In [6]:
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

# Bagging Classifier

In [13]:
#each base model is decision tree ...
#n_estimators: number of trees (more trees = more stable) "if you incease the number of trees the accuracy will increase but the training will be slow"...
#max_samples: each trees sees 80% from training example (bootstrap example) ...
#bootstrap: sampling with replacement ...
bag=BaggingClassifier(estimator=DecisionTreeClassifier(),n_estimators=20,max_samples=0.8,bootstrap=True,random_state=42)
bag.fit(X_train,Y_train)
#bag.predict(X_test): each tree predicts on X_test and baggingclassifier combines them using majority vote ...
bag_pred=bag.predict(X_test)
bag_acc=accuracy_score(Y_test,bag_pred)
print ('The Accuracy For Bagging Classifier: ',bag_acc)

The Accuracy For Bagging Classifier:  0.956140350877193


# Boosting Classifier

In [14]:
#DecisionTreeClassifier(max_depth=1): very shallow tree (decision stump) ...
#learning_rate: controls the contribution (weights) of each weak new learner(how much each tree will effect in the final decision )if the value near to 1 that mean that will learn quikly and the effect of this tree will be more strong ...
boost=AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=1),n_estimators=50,learning_rate=0.8,random_state=42)
boost.fit(X_train,Y_train)
boost_pred=boost.predict(X_test)
boost_acc=accuracy_score(Y_test,boost_pred)
print('The Accuracy For Boosting Classifier: ',bag_acc)

The Accuracy For Boosting Classifier:  0.956140350877193


# Stacking Classifier

In [15]:
base_models=[('rf',RandomForestClassifier(n_estimators=50,random_state=42)),('gb',GradientBoostingClassifier(random_state=42))]
#passthrough: if (True->take Baseline model predictions + raw data) ,(False->Keep things simple and prevent overfitting) ...
stack=StackingClassifier(estimators=base_models,final_estimator=LogisticRegression(),passthrough=False)
stack.fit(X_train,Y_train)
stack_pred=stack.predict(X_test)
stack_acc=accuracy_score(Y_test,stack_pred)
print('The Accuracy For Stacking Classifier: ',stack_acc)

The Accuracy For Stacking Classifier:  0.956140350877193
