In [1]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

voting_clf = VotingClassifier(estimators=[('lr', log_clf),('rf',rnd_clf),('svc',svm_clf)],voting='hard')
voting_clf.fit(X_train,y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('rf', RandomForestClassifier()), ('svc', SVC())])

In [3]:
from sklearn.metrics import accuracy_score
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train,y_train)
    y_pred=clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test,y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.896
SVC 0.896
VotingClassifier 0.904


In [6]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(DecisionTreeClassifier(),n_estimators=500,max_samples=100,bootstrap=True,
                            n_jobs = -1,oob_score=True)
bag_clf.fit(X_train,y_train)
y_pred = bag_clf.predict(X_test)

In [7]:
accuracy_score(y_test,y_pred)

0.912

In [8]:
bag_clf.oob_score_

0.928

In [9]:
bag_clf.oob_decision_function_

array([[0.31313131, 0.68686869],
       [0.36923077, 0.63076923],
       [1.        , 0.        ],
       [0.0129199 , 0.9870801 ],
       [0.02362205, 0.97637795],
       [0.08743169, 0.91256831],
       [0.40350877, 0.59649123],
       [0.08375635, 0.91624365],
       [0.94385027, 0.05614973],
       [0.83835616, 0.16164384],
       [0.58005249, 0.41994751],
       [0.05277045, 0.94722955],
       [0.75765306, 0.24234694],
       [0.83423913, 0.16576087],
       [0.9204244 , 0.0795756 ],
       [0.10761155, 0.89238845],
       [0.0237467 , 0.9762533 ],
       [0.9197861 , 0.0802139 ],
       [0.65957447, 0.34042553],
       [0.96103896, 0.03896104],
       [0.05277045, 0.94722955],
       [0.22311828, 0.77688172],
       [0.92167102, 0.07832898],
       [0.98644986, 0.01355014],
       [0.94459103, 0.05540897],
       [0.00763359, 0.99236641],
       [0.9602122 , 0.0397878 ],
       [1.        , 0.        ],
       [0.01842105, 0.98157895],
       [0.75255102, 0.24744898],
       [0.

In [10]:
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X_train,y_train)
y_pred_rf = rnd_clf.predict(X_test)

In [11]:
accuracy_score(y_test, y_pred_rf)

0.912

In [12]:
from sklearn.datasets import load_iris

iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500,n_jobs=-1)
rnd_clf.fit(iris['data'],iris['target'])
for name, score in zip(iris['feature_names'],rnd_clf.feature_importances_):
    print(name, score)

sepal length (cm) 0.08177021147953226
sepal width (cm) 0.023045465489952992
petal length (cm) 0.43230372002668377
petal width (cm) 0.46288060300383105


In [14]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor
X_train, X_val, y_train, y_val = train_test_split(X,y)

gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120)
gbrt.fit(X_train,y_train)

errors = [mean_squared_error(y_val, y_pred) for y_pred in gbrt.staged_predict(X_val)]

bst_n_estimators = np.argmin(errors) + 1

gbrt_best = GradientBoostingRegressor(max_depth=2, n_estimators = bst_n_estimators)
gbrt_best.fit(X_train,y_train)

GradientBoostingRegressor(max_depth=2, n_estimators=52)

In [15]:
gbrt = GradientBoostingRegressor(max_depth=2,warm_start=True)

min_val_error = float('inf')
error_going_up=0

for n_estimators in range(1,120):
    gbrt.n_estimators = n_estimators
    gbrt.fit(X_train,y_train)
    y_pred = gbrt.predict(X_val)
    val_error = mean_squared_error(y_val, y_pred)
    if val_error < min_val_error:
        min_val_error = val_error
        error_going_up = 0 
        
    else:
        error_going_up += 1
        if error_going_up == 5:
            break

In [None]:
import xgboost
xgb_reg = xgboost.XGRegressor()
xgb_reg.fit(X_train,y_train)
y_pred = xgb_reg.predict(X_val)