Skip to content

Commit

Permalink
Merge 25de6f3 into cd7f6ef
Browse files Browse the repository at this point in the history
  • Loading branch information
vecxoz committed Jun 23, 2019
2 parents cd7f6ef + 25de6f3 commit 416388d
Show file tree
Hide file tree
Showing 7 changed files with 318 additions and 165 deletions.
102 changes: 61 additions & 41 deletions tests/test_func_api_classification_binary.py

Large diffs are not rendered by default.

102 changes: 61 additions & 41 deletions tests/test_func_api_classification_multiclass.py

Large diffs are not rendered by default.

24 changes: 22 additions & 2 deletions tests/test_func_api_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from scipy.sparse import coo_matrix
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
# from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.datasets import load_boston
from sklearn.metrics import mean_absolute_error
Expand All @@ -39,7 +39,27 @@

boston = load_boston()
X, y = boston.data, boston.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)


# Make train/test split by hand to avoid strange errors probably related to testing suit:
# https://github.com/scikit-learn/scikit-learn/issues/1684
# https://github.com/scikit-learn/scikit-learn/issues/1704
# Note: Python 2.7, 3.4 - OK, but 3.5, 3.6 - error

np.random.seed(0)
ind = np.arange(500)
np.random.shuffle(ind)

ind_train = ind[:400]
ind_test = ind[400:]

X_train = X[ind_train]
X_test = X[ind_test]

y_train = y[ind_train]
y_test = y[ind_test]


#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
Expand Down
98 changes: 59 additions & 39 deletions tests/test_sklearn_api_classification_binary.py

Large diffs are not rendered by default.

98 changes: 59 additions & 39 deletions tests/test_sklearn_api_classification_multiclass.py

Large diffs are not rendered by default.

24 changes: 22 additions & 2 deletions tests/test_sklearn_api_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from sklearn.base import RegressorMixin
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
# from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
Expand All @@ -50,7 +50,27 @@

boston = load_boston()
X, y = boston.data, boston.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)


# Make train/test split by hand to avoid strange errors probably related to testing suit:
# https://github.com/scikit-learn/scikit-learn/issues/1684
# https://github.com/scikit-learn/scikit-learn/issues/1704
# Note: Python 2.7, 3.4 - OK, but 3.5, 3.6 - error

np.random.seed(0)
ind = np.arange(500)
np.random.shuffle(ind)

ind_train = ind[:400]
ind_test = ind[400:]

X_train = X[ind_train]
X_test = X[ind_test]

y_train = y[ind_train]
y_test = y[ind_test]


# -----------------------------------------------------------------------------
# Scikit-learn INcompatible estimator
Expand Down
35 changes: 34 additions & 1 deletion vecstack/coresk.py
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,35 @@ def _estimator_action(self, estimator, X_train, y_train, X_test,
# -------------------------------------------------------------------------
# -------------------------------------------------------------------------

def _random_choice(self, n, size, bound=2**30):
"""
Memory efficient (but slower) version of np.random.choice
Parameters:
===========
n : int
Upper value for range to chose from: [0, n).
This parameter is bounded (see bound).
size: int
Number of values to chose
bound : int
Upper random int for backward compatibility
with some older numpy versions
Returns:
========
ids : 1d numpy array of shape (size, ) dtype=np.int32
"""
ids = []
while len(ids) < size:
rnd = np.random.randint(min(bound, n))
if rnd not in ids:
ids.append(rnd)
return np.array(ids, dtype=np.int32)

# -------------------------------------------------------------------------
# -------------------------------------------------------------------------

def _get_footprint(self, X, n_items=1000):
"""Selects ``n_items`` random elements from 2d numpy array or
sparse matrix (or all elements if their number is less or equal
Expand All @@ -861,7 +890,11 @@ def _get_footprint(self, X, n_items=1000):
r, c = X.shape
n = r * c
# np.random.seed(0) # for development
ids = np.random.choice(n, min(n_items, n), replace=False)

# OOM with large arrays (see #29)
# ids = np.random.choice(n, min(n_items, n), replace=False)

ids = self._random_choice(n, min(n_items, n))

for i in ids:
row = i // c
Expand Down

0 comments on commit 416388d

Please sign in to comment.