In [28]:
import numpy as np
import pandas as pd 


from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier, VotingClassifier,BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

In [24]:
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)

X_train,X_test,y_train,y_test = train_test_split(X,y, random_state = 42)

In [25]:
log_clf = LogisticRegression()
rdn_clf = RandomForestClassifier()
svm_clf = SVC()

In [26]:
voting_clf = VotingClassifier(
    estimators=[('lr',log_clf), ('rf', rdn_clf), ('svc', svm_clf)],
    voting = 'hard'
)
voting_clf.fit(X_train,y_train)

In [27]:
for clf in (log_clf,rdn_clf,svm_clf,voting_clf):
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test,y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.896
SVC 0.896
VotingClassifier 0.904


## How bagging works towards ensemble method

In [31]:
# Ensemble method of 500 decision trees classifiers
# Each tree is trained using 100 training instances randomly sampled from the training set
# With replacement (boostrap = true)

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(),
    n_estimators = 500,
    max_samples = 100,
    bootstrap= True,
    n_jobs = -1 # use all gpu
)
bag_clf.fit(X_train,y_train)
y_pred = bag_clf.predict(X_test)

# Begging performs by standard the soft voting instead of hard voting

## Taking advantage of Out of bag evaluation , instead of validation set

In [32]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(),
    n_estimators = 500,
    max_samples = 100,
    bootstrap= True,
    n_jobs = -1,
    oob_score = True
)
bag_clf.fit(X_train,y_train)
bag_clf.oob_score_

0.9226666666666666

## So, according to the oob evaluation , this bagging classifier is likely to achieve about 92% accuracy on the test set

In [33]:
y_pred = bag_clf.predict(X_test)
accuracy_score(y_test,y_pred)

0.912

In [35]:
# These are the probabilities of each instance has to belong to the positive or negative class, respectively
bag_clf.oob_decision_function_

array([[0.33773087, 0.66226913],
       [0.44502618, 0.55497382],
       [1.        , 0.        ],
       [0.00265252, 0.99734748],
       [0.0156658 , 0.9843342 ],
       [0.09067358, 0.90932642],
       [0.34010152, 0.65989848],
       [0.05      , 0.95      ],
       [0.96658098, 0.03341902],
       [0.84031414, 0.15968586],
       [0.56188119, 0.43811881],
       [0.03551913, 0.96448087],
       [0.74798928, 0.25201072],
       [0.83641161, 0.16358839],
       [0.92227979, 0.07772021],
       [0.08695652, 0.91304348],
       [0.03589744, 0.96410256],
       [0.94344473, 0.05655527],
       [0.58575198, 0.41424802],
       [0.94936709, 0.05063291],
       [0.06084656, 0.93915344],
       [0.22811671, 0.77188329],
       [0.88020833, 0.11979167],
       [0.9895288 , 0.0104712 ],
       [0.9525    , 0.0475    ],
       [0.0027027 , 0.9972973 ],
       [0.93931398, 0.06068602],
       [1.        , 0.        ],
       [0.01240695, 0.98759305],
       [0.70923913, 0.29076087],
       [0.