In [4]:

from sklearn.datasets import load_breast_cancer, make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import sys


from stacking_transformer import ClassificationStackingTransformer

In [5]:
# Load demo data
dataset = load_breast_cancer()
X, y = dataset.data, dataset.target
# Make train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.2,
                                                        random_state=0)

In [6]:
estimators_L1 = [
    ('et', ExtraTreesClassifier(
            random_state=0,
            n_jobs=-1,
            n_estimators=100,
            max_depth=3
    )),
    ('rf', RandomForestClassifier(
            random_state=0,
            n_jobs=-1,
            n_estimators=100,
            max_depth=3
    )),
    ('knn',KNeighborsClassifier(n_neighbors=10))
]

# Stacking
n_folds = 5
stack = ClassificationStackingTransformer(
    estimators=estimators_L1,
    shuffle=True,
    random_state=0,
    verbose=1,
    n_folds=n_folds,
)

stack.fit(X_train, y_train)
S_train = stack.transform(X_train)
S_test = stack.transform(X_test)

# # Use 2nd level estimator to get final prediction
estimator_L2 = GradientBoostingClassifier(random_state=0,
                                learning_rate=0.1,
                                n_estimators=100,
                                max_depth=3
                                )

estimator_L2 = estimator_L2.fit(S_train, y_train)
y_pred = estimator_L2.predict(S_test)

# Final prediction score
print('Final score: [%.8f]' % accuracy_score(y_test, y_pred))

metric:  accuracy_score 
 n_estimators:  3 

Estimator: [et: ExtraTreesClassifier]
Mean Scores: [0.94725275]  -  Std Scrores: [0.01758242]

Estimator: [rf: RandomForestClassifier]
Mean Scores: [0.95384615]  -  Std Scrores: [0.01076699]

Estimator: [knn: KNeighborsClassifier]
Mean Scores: [0.92747253]  -  Std Scrores: [0.02262776]

Train set was detected.
Final score: [0.95614035]


In [7]:
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, n_informative=10)

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.2,
                                                        random_state=0)

In [8]:
estimators_L1 = [
    ('et', ExtraTreesClassifier(
            random_state=0,
            n_jobs=-1,
            n_estimators=100,
            max_depth=3
    )),
    ('rf', RandomForestClassifier(
            random_state=0,
            n_jobs=-1,
            n_estimators=100,
            max_depth=3
    )),
    ('knn',KNeighborsClassifier(n_neighbors=10))
]

# Stacking
n_folds = 5
stack = ClassificationStackingTransformer(
    estimators=estimators_L1,
    shuffle=True,
    random_state=0,
    verbose=1,
    n_folds=n_folds,
)

stack.fit(X_train, y_train)
S_train = stack.transform(X_train)
S_test = stack.transform(X_test)

# # Use 2nd level estimator to get final prediction
estimator_L2 = GradientBoostingClassifier(random_state=0,
                                learning_rate=0.1,
                                n_estimators=100,
                                max_depth=3
                                )

estimator_L2 = estimator_L2.fit(S_train, y_train)
y_pred = estimator_L2.predict(S_test)

# Final prediction score
print('Final score: [%.8f]' % accuracy_score(y_test, y_pred))

metric:  accuracy_score 
 n_estimators:  3 

Estimator: [et: ExtraTreesClassifier]
Mean Scores: [0.66250000]  -  Std Scrores: [0.05902859]

Estimator: [rf: RandomForestClassifier]
Mean Scores: [0.65625000]  -  Std Scrores: [0.05258921]

Estimator: [knn: KNeighborsClassifier]
Mean Scores: [0.80625000]  -  Std Scrores: [0.01895719]

Train set was detected.
Final score: [0.82000000]


In [13]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

# Init 1st level estimators
estimators_l1 = [
    ('et', ExtraTreesClassifier(
            random_state=0,
            n_jobs=-1,
            n_estimators=100,
            max_depth=3
    )),
    ('rf', RandomForestClassifier(
            random_state=0,
            n_jobs=-1,
            n_estimators=100,
            max_depth=3
    )),
    ('knn',KNeighborsClassifier(n_neighbors=10))
]
# Stacking
stack_l1 = ClassificationStackingTransformer(
        estimators=estimators_l1,
        shuffle=True,
        random_state=0,
        verbose=1,
        n_jobs=-1
)


pipeline = Pipeline([
        ('stack_l1',stack_l1), 
        ("final_pred", LogisticRegression())
])

In [14]:
pipeline.fit(X_train, y_train)

metric:  accuracy_score 
 n_estimators:  3 

Estimator: [et: ExtraTreesClassifier]
Mean Scores: [0.65375000]  -  Std Scrores: [0.03542157]

Estimator: [rf: RandomForestClassifier]
Mean Scores: [0.67375000]  -  Std Scrores: [0.04407026]

Estimator: [knn: KNeighborsClassifier]
Mean Scores: [0.80500000]  -  Std Scrores: [0.02958040]

Train set was detected.
metric:  accuracy_score 
 n_estimators:  3 

Estimator: [et: ExtraTreesClassifier]
Mean Scores: [0.82750000]  -  Std Scrores: [0.02561738]

Estimator: [rf: RandomForestClassifier]
Mean Scores: [0.81875000]  -  Std Scrores: [0.02161452]

Estimator: [knn: KNeighborsClassifier]
Mean Scores: [0.83125000]  -  Std Scrores: [0.02218530]

Train set was detected.


In [15]:
y_pred_test = pipeline.predict(X_test)

In [16]:
from sklearn.metrics import accuracy_score

lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred_test_lr = lr.predict(X_test)


print(f"Accuracy with stacking: % {100*accuracy_score(y_test, y_pred_test):.2f}" )
print(f"Accuracy without stacking: % {100*accuracy_score(y_test, y_pred_test_lr):.2f}")

Accuracy with stacking: % 78.50
Accuracy without stacking: % 60.00
