## Make the dataset

In [2]:
from sklearn.datasets import make_moons
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

In [3]:
X, y = make_moons(n_samples= 5000, noise= 0.4, random_state= 42) # make the data set

In [5]:
# perform data splitting
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state= 42) 

## Make Voting Classifier

In [6]:
from sklearn.ensemble import VotingClassifier

In [17]:
voting_clf = VotingClassifier(
    estimators = [
        ('logistic', LogisticRegression(random_state= 42)),
        ('Decision', DecisionTreeClassifier(max_depth= 15)),
        ('SVC', SVC(random_state= 42))
    ]
)
voting_clf.fit(X_train, y_train)

In [23]:
print("\tAccuracy Table:" , "\nFor training: ",
        voting_clf.score(X_train, y_train), "\nFor testing: ", 
             voting_clf.score(X_test, y_test))

	Accuracy Table: 
For training:  0.8754666666666666 
For testing:  0.8552


## Bagging Classifier

In [24]:
from sklearn.ensemble import BaggingClassifier

In [41]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(), max_samples= 100, oob_score= True)
bag_clf.fit(X_train, y_train)

In [45]:
# note the smaal difference between testing accuracy and the oob score
print("\tAccuracy Table:" , "\nFor training: ",
        bag_clf.score(X_train, y_train), "\nFor testing: ", 
             bag_clf.score(X_test, y_test), 
                "\nOOB score: ", bag_clf.oob_score_)

	Accuracy Table: 
For training:  0.8562666666666666 
For testing:  0.8624 
OOB score:  0.8512


## Random Patches

In [53]:
bag_clf_2 = BaggingClassifier(DecisionTreeClassifier(max_features= "sqrt", max_depth= 10, min_samples_leaf= 5),oob_score= True, )
bag_clf_2.fit(X_train, y_train)

  warn(
  oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]


In [58]:
# note the smaal difference between testing accuracy and the oob score
print("\tAccuracy Table:" , "\nFor training: ",
        bag_clf_2.score(X_train, y_train), "\nFor testing: ", 
             bag_clf_2.score(X_test, y_test), 
                "\nOOB score: ", bag_clf_2.oob_score_)

	Accuracy Table: 
For training:  0.8901333333333333 
For testing:  0.8592 
OOB score:  0.8378666666666666


## Random Forest

In [55]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV as RSCV
from scipy.stats import randint as sp_randint

In [77]:
rdf_clf = RandomForestClassifier(max_depth=10,random_state= 42)

In [81]:
hyper_pars = {
                "max_depth": [3, 5], 
                "max_features": sp_randint(1, 2), 
                "min_samples_split": sp_randint(2, 20), 
                "max_depth": sp_randint(3,20),
                "bootstrap": [True, False], 
                "criterion": ["gini", "entropy"]
            } 
random_search = RSCV(rdf_clf, param_distributions=hyper_pars, 
    n_iter=5, cv=5) 

In [82]:
random_search.fit(X_train, y_train)

In [83]:
# note the smaal difference between testing accuracy
print("\tAccuracy Table:" , "\nFor training: ",
        random_search.score(X_train, y_train), "\nFor testing: ", 
             random_search.score(X_test, y_test))

	Accuracy Table: 
For training:  0.8594666666666667 
For testing:  0.8728
