In [3]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC


In [4]:
X,y=make_classification(n_samples=10000,n_features=10,n_informative=3)

In [5]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [6]:
dt=DecisionTreeClassifier(random_state=42)

In [7]:
dt.fit(X_train,y_train)
y_pred=dt.predict(X_test)

In [10]:
print("Decision Tree accuracy:",accuracy_score(y_test,y_pred))

Decision Tree accuracy: 0.948


# Bagging

In [11]:
bag=BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                     n_estimators=100,max_samples=0.5,bootstrap=True,random_state=42)

In [12]:
bag.fit(X_train,y_train)



In [13]:
y_pred=bag.predict(X_test)

In [14]:
print("Accuracy of Bagging:",accuracy_score(y_test,y_pred))

Accuracy of Bagging: 0.965


In [17]:
bag.estimators_samples_[0].shape

(4000,)

In [20]:
bag.estimators_features_[0].shape

(10,)

# Bagging with SVC

In [21]:
bag=BaggingClassifier(base_estimator=SVC(),
                     n_estimators=100,max_samples=0.5,bootstrap=True,random_state=42)

In [22]:
bag.fit(X_train,y_train)
y_pred=bag.predict(X_test)
print("Accuracy of Bagging using SVC:",accuracy_score(y_test,y_pred))



Accuracy of Bagging using SVC: 0.9555


# Pasting

In [26]:
bag=BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                     n_estimators=100,max_samples=0.25,bootstrap=False,random_state=42,n_jobs=-1)

In [27]:
bag.fit(X_train,y_train)
y_pred=bag.predict(X_test)
print("Accuracy of Pasting:",accuracy_score(y_test,y_pred))



Accuracy of Pasting: 0.9625


# Random Subspaces

In [28]:
bag=BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                     n_estimators=100,max_samples=1.0,bootstrap=False,max_features=0.5,bootstrap_features=True,random_state=42,n_jobs=-1)

In [29]:
bag.fit(X_train,y_train)
y_pred=bag.predict(X_test)
print("Accuracy of Random Subspaces:",accuracy_score(y_test,y_pred))



Accuracy of Random Subspaces: 0.958


In [30]:
bag.estimators_samples_[0].shape

(8000,)

In [31]:
bag.estimators_features_[0].shape

(5,)

# Random Patches

In [32]:
bag=BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                     n_estimators=100,max_samples=0.25,bootstrap=True,max_features=0.5,bootstrap_features=True,random_state=42,n_jobs=-1)

In [33]:
bag.fit(X_train,y_train)
y_pred=bag.predict(X_test)
print("Accuracy of Random Patches:",accuracy_score(y_test,y_pred))



Accuracy of Random Patches: 0.955


# OOB score

In [34]:
#during bagging process and random subspaces not all the freatures are selected when replace=True

In [36]:
bag=BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                     n_estimators=100,max_samples=0.25,bootstrap=True,oob_score=True,random_state=42,n_jobs=-1)

In [37]:
bag.fit(X_train,y_train)




In [38]:
bag.oob_score_

0.9635

In [39]:
y_pred=bag.predict(X_test)
print("Accuracy:",accuracy_score(y_test,y_pred))

Accuracy: 0.961


# Applying GridSearchCv

In [40]:
from sklearn.model_selection import GridSearchCV

In [41]:
parameters = {
    'n_estimators': [50,100,250], 
    'max_samples': [0.1,0.2,0.3,0.4,0.5,0.6,0.7,1.0],
    'bootstrap' : [True,False],
    'max_features' : [0.1,0.4,0.7,1.0]
    }
     

In [44]:
search=GridSearchCV(BaggingClassifier(),parameters,cv=5)

In [45]:
search.fit(X_train,y_train)

In [46]:
search.best_score_


0.9646250000000001

In [47]:
search.best_params_

{'bootstrap': True,
 'max_features': 0.7,
 'max_samples': 1.0,
 'n_estimators': 250}