Skip to content

Commit

Permalink
added tests for folding
Browse files Browse the repository at this point in the history
  • Loading branch information
arogozhnikov committed Jul 22, 2015
1 parent 9dd8eee commit 9959297
Showing 1 changed file with 38 additions and 11 deletions.
49 changes: 38 additions & 11 deletions tests/test_folding.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from __future__ import division, print_function, absolute_import

import numpy
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingRegressor
from sklearn.svm import SVC
from sklearn.metrics.metrics import accuracy_score, roc_auc_score
from sklearn.metrics import accuracy_score, roc_auc_score

from rep.estimators import SklearnClassifier
from rep.metaml import FoldingClassifier
from rep.estimators import SklearnClassifier, SklearnRegressor
from rep.metaml import FoldingRegressor, FoldingClassifier
from rep.test.test_estimators import generate_classification_data, check_classification_model


__author__ = 'antares'


Expand All @@ -19,7 +20,7 @@ def check_folding(classifier, check_instance=True, has_staged_pp=True, has_impor
assert list(classifier.features) == list(X.columns)

check_classification_model(classifier, X, y, check_instance=check_instance, has_staged_pp=has_staged_pp,
has_importances=has_importances)
has_importances=has_importances)

def mean_vote(x):
return numpy.mean(x, axis=0)
Expand All @@ -44,11 +45,37 @@ def mean_vote(x):
assert numpy.all(p == proba)


def test_folding():
# base_ada = SklearnClassifier(AdaBoostClassifier())
# folding_str = FoldingClassifier(base_ada, n_folds=2)
# check_folding(folding_str, True, False, False)
def test_folding_regressor(n_samples=100, n_features=3):
"""
checking mostly different things with quality of predictions and not using train data during predictions.
"""
from sklearn.metrics import mean_squared_error

X = numpy.random.normal(size=[n_samples, n_features])
y = numpy.random.normal(size=n_samples)
kfolder = FoldingRegressor(SklearnRegressor(GradientBoostingRegressor()), n_folds=2)
kfolder.fit(X, y)
preds = kfolder.predict(X)
# checking that we fitted fine
assert mean_squared_error(y, preds) > mean_squared_error(y * 0., preds) * 0.5

# shuffled preds
p = numpy.random.permutation(n_samples)
preds2 = kfolder.predict(X[p])[numpy.argsort(p)]

# Now let's compare this with shuffled kFolding:
assert mean_squared_error(y, preds) > mean_squared_error(y, preds2) * 0.5

preds_mean = kfolder.predict(X, vote_function=lambda x: numpy.mean(x, axis=0))
# Now let's compare this with mean prediction:
assert mean_squared_error(y, preds) > mean_squared_error(y, preds_mean)


def test_folding_classifier():
base_ada = SklearnClassifier(AdaBoostClassifier())
folding_str = FoldingClassifier(base_ada, n_folds=2)
check_folding(folding_str, True, True, False)

base_ada = SklearnClassifier(SVC())
folding_str = FoldingClassifier(base_ada, n_folds=4)
base_svm = SklearnClassifier(SVC())
folding_str = FoldingClassifier(base_svm, n_folds=4)
check_folding(folding_str, True, False, False)

0 comments on commit 9959297

Please sign in to comment.