In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

In [86]:
log_clf = LogisticRegression(random_state=42)
rnd_clf = RandomForestClassifier(random_state=42)
svm_clf = SVC(probability=True, random_state=42)

In [110]:
X, y = make_moons(500, noise=0.3, random_state=42)

In [111]:
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=42)

In [89]:
voting_clf = VotingClassifier(
    estimators=[('lr',log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='soft'
)

In [29]:
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('rf', RandomForestClassifier()),
                             ('svc', SVC(probability=True))],
                 voting='soft')

In [35]:
from sklearn.metrics import accuracy_score

In [90]:
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.896
SVC 0.896
VotingClassifier 0.92


In [91]:
?train_test_split

In [100]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [95]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=100, bootstrap=True, n_jobs=-1
)

In [96]:
bag_clf.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=100,
                  n_estimators=500, n_jobs=-1)

In [97]:
y_pred = bag_clf.predict(X_test)

In [8]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import cross_val_score

In [64]:
ada_clf = AdaBoostClassifier(n_estimators=50, learning_rate=0.4)

In [65]:
cross_val_score(ada_clf, X_train, y_train, cv=10, scoring='accuracy').mean()

0.9197724039829304

In [67]:
ada_clf.fit(X_train, y_train)

AdaBoostClassifier(learning_rate=0.4)

In [81]:
np.mean(ada_clf.predict(X_test) == y_test)

0.888

In [84]:
from sklearn.ensemble import GradientBoostingClassifier

In [87]:
import xgboost

In [124]:
from sklearn.metrics import accuracy_score

In [166]:
xgb_reg  = xgboost.XGBClassifier(early_stopping_rounds=2)

In [167]:
xgb_reg.fit(X_train[:300], y_train[:300], eval_set=[(X_train[300:], y_train[300:])])

[0]	validation_0-logloss:0.52785
[1]	validation_0-logloss:0.44419
[2]	validation_0-logloss:0.38910
[3]	validation_0-logloss:0.36080
[4]	validation_0-logloss:0.34360
[5]	validation_0-logloss:0.33733
[6]	validation_0-logloss:0.33863
[7]	validation_0-logloss:0.33106
[8]	validation_0-logloss:0.32443
[9]	validation_0-logloss:0.33641
[10]	validation_0-logloss:0.33907


XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=2, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=100,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, ...)

In [172]:
np.mean(xgb_reg.predict(X_test) == y_test)

0.904

In [6]:
from sklearn.datasets import fetch_openml

In [7]:
mnist = fetch_openml('mnist_784', version=1)

In [9]:
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [10]:
X, y = mnist['data'], mnist['target']

In [11]:
X_train, X_val, X_test, y_train, y_val, y_test = X[:50000].values, X[50000:60000].values, X[60000:].values, y[:50000].values.astype(int), y[50000:60000].values.astype(int), y[60000:].values.astype(int)

In [None]:
from sklearn.ensemble import VotingClassifier, RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

In [234]:
clf = VotingClassifier(estimators=[('rf',RandomForestClassifier(n_jobs=-1)), 
                                   ('ex',ExtraTreesClassifier(n_jobs=-1)), 
                                   ('lgr',LogisticRegression(n_jobs=-1, max_iter=10000)), 
                                   ('svc',SVC(probability=True))],
                      voting='hard')

In [None]:
scal = StandardScaler()
scal.fit(X_train)
X_train_tf = scal.transform(X_train)
X_val_tf = scal.transform(X_val)

for model in [RandomForestClassifier(n_jobs=-1), ExtraTreesClassifier(n_jobs=-1), LogisticRegression(n_jobs=-1,max_iter=10000), 
              SVC(), clf]:
    model.fit(X_train_tf, y_train)
    ac = np.mean(model.predict(X_val_tf) == y_val)
    print(model.__class__.__name__, ": ", ac)

RandomForestClassifier :  0.973
ExtraTreesClassifier :  0.9753
LogisticRegression :  0.9223
SVC :  0.9687


In [16]:
clf = VotingClassifier(estimators=[('rf',RandomForestClassifier(n_jobs=-1)), 
                                   ('ex',ExtraTreesClassifier(n_jobs=-1)), 
                                   ('lgr',LogisticRegression(n_jobs=-1, max_iter=10000))],
                      voting='soft')

In [17]:
scal = StandardScaler()
scal.fit(X_train)
X_train_tf = scal.transform(X_train)
X_val_tf = scal.transform(X_val)

In [18]:
clf.fit(X_train_tf, y_train)

VotingClassifier(estimators=[('rf', RandomForestClassifier(n_jobs=-1)),
                             ('ex', ExtraTreesClassifier(n_jobs=-1)),
                             ('lgr',
                              LogisticRegression(max_iter=10000, n_jobs=-1))],
                 voting='soft')

In [20]:
np.mean(clf.predict(X_val_tf) == y_val)

0.9605

In [21]:
clf.voting = 'hard'

In [23]:
clf.score(X_val_tf, y_val)

0.9727

In [31]:
clf.estimators_[2].score(X_val_tf, y_val)

0.9223