# **Ensemble Learning and Random Forest**
In this study, I will use the methods of classification by voting, classification by bagging, and classification by pasting. I will also use the Random forests algorithm.

In [2]:
import sys
import sklearn
from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import StackingClassifier

# **Voting Classifier**

In [5]:
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

voting_classifier = VotingClassifier(
    estimators=[
        ('lr', LogisticRegression(random_state=42)),
        ('rf', RandomForestClassifier(random_state=42)),
        ('svc', SVC(random_state=42))
    ]
)
voting_classifier.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression(random_state=42)),
                             ('rf', RandomForestClassifier(random_state=42)),
                             ('svc', SVC(random_state=42))])

In [6]:
for name, clsfr in voting_classifier.named_estimators_.items():
    print(name, "=", clsfr.score(X_test, y_test))

lr = 0.864
rf = 0.896
svc = 0.896


In [7]:
voting_classifier.predict(X_test[:1])

array([1])

In [8]:
[clsfr.predict(X_test[:1]) for clsfr in voting_classifier.estimators_]

[array([1]), array([1]), array([0])]

In [9]:
voting_classifier.score(X_test, y_test)

0.912

# **Bagging and Pasting**

In [10]:
bag_classifier = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500,
                            max_samples=100, n_jobs=-1, random_state=42)
bag_classifier.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=100,
                  n_estimators=500, n_jobs=-1, random_state=42)

In [11]:
y_pred = bag_classifier.predict(X_test)
accuracy_score(y_test, y_pred)

0.904

# **Random Forests**

In [12]:
random_classifier = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16,
                                 n_jobs=-1, random_state=42)
random_classifier.fit(X_train, y_train)
y_pred_rf = random_classifier.predict(X_test)
accuracy_score(y_test, y_pred_rf)

0.912

# **Boosting**
# **AdaBoosting**

In [13]:
ada_classifier = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1), n_estimators=30,
    learning_rate=0.5, random_state=42)
ada_classifier.fit(X_train, y_train)
y_pred_ada = ada_classifier.predict(X_test)
accuracy_score(y_test, y_pred_ada)

0.904

# **Stacking**

In [14]:
stacking_classifier = StackingClassifier(
    estimators=[
        ('lr', LogisticRegression(random_state=42)),
        ('rf', RandomForestClassifier(random_state=42)),
        ('svc', SVC(probability=True, random_state=42))
    ],
    final_estimator=RandomForestClassifier(random_state=43),
    cv=5  
)
stacking_classifier.fit(X_train, y_train)

StackingClassifier(cv=5,
                   estimators=[('lr', LogisticRegression(random_state=42)),
                               ('rf', RandomForestClassifier(random_state=42)),
                               ('svc', SVC(probability=True, random_state=42))],
                   final_estimator=RandomForestClassifier(random_state=43))

In [15]:
stacking_classifier.score(X_test, y_test)

0.928

In [16]:
y_pred_stacking = stacking_classifier.predict(X_test)
accuracy_score(y_test, y_pred_stacking)

0.928