In [60]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.cross_validation import train_test_split
from sklearn.metrics import precision_score, confusion_matrix, classification_report

%matplotlib inline

In [47]:
X, y = make_classification(100000, n_features=20, n_informative=5, n_redundant=2, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [56]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(precision_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.912566148013585
             precision    recall  f1-score   support

          0       0.92      0.91      0.91     12419
          1       0.91      0.92      0.92     12581

avg / total       0.91      0.91      0.91     25000



In [57]:
model_ens = BaggingClassifier(
    DecisionTreeClassifier(),
    n_estimators=50, 
    max_features=0.5,
    max_samples=0.5,
    random_state=0
)
model_ens.fit(X_train, y_train)
y_pred = model_ens.predict(X_test)
print(precision_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.9276067839195979
             precision    recall  f1-score   support

          0       0.94      0.93      0.93     12419
          1       0.93      0.94      0.93     12581

avg / total       0.93      0.93      0.93     25000



In [58]:
model_ada = AdaBoostClassifier(
    DecisionTreeClassifier(),
    n_estimators=50, 
    random_state=0
)
model_ada.fit(X_train, y_train)
y_pred = model_ada.predict(X_test)
print(precision_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.9140210592985512
             precision    recall  f1-score   support

          0       0.92      0.91      0.91     12419
          1       0.91      0.92      0.92     12581

avg / total       0.92      0.92      0.92     25000



In [61]:
model_rf = RandomForestClassifier(
    n_estimators=30, 
    random_state=0
)
model_rf.fit(X_train, y_train)
y_pred = model_rf.predict(X_test)
print(precision_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.9414676304211188
             precision    recall  f1-score   support

          0       0.95      0.94      0.95     12419
          1       0.94      0.95      0.95     12581

avg / total       0.95      0.95      0.95     25000

