Skip to content

Commit

Permalink
Fill gaps in doc (X_test can be None). Add some tests
Browse files Browse the repository at this point in the history
  • Loading branch information
vecxoz committed Oct 30, 2019
1 parent 4dd0566 commit 1bc58f4
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 7 deletions.
82 changes: 80 additions & 2 deletions tests/test_func_api_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -828,9 +828,13 @@ def test_exceptions(self):
X_train, y_train, X_test, verbose=25)

# Internal function model_action
assert_raises(ValueError, model_action, LinearRegression(),
X_train, y_train, X_test, sample_weight=None,
assert_raises(ValueError, model_action, LinearRegression(),
X_train, y_train, X_test, sample_weight=None,
action='abc', transform=None)

# X_test is None when mode != 'oof'
assert_raises(ValueError, stacking, [LinearRegression()],
X_train, y_train, None, mode='oof_pred_bag')

#---------------------------------------------------------------------------
# Testing parameter warnings
Expand Down Expand Up @@ -940,6 +944,80 @@ def test_small_input(self):
assert_array_equal(S_train_1, S_train_3)
assert_array_equal(S_test_1, S_test_3)

#---------------------------------------------------------------------------
# Mode 'oof', X_test=None
#---------------------------------------------------------------------------

def test_oof_mode_with_none(self):

model = LinearRegression()
S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds,
n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
S_test_1 = None

models = [LinearRegression()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, None,
regression = True, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
mode = 'oof', random_state = 0, verbose = 0)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name, allow_pickle=True)
S_train_3 = S[0]
S_test_3 = S[1]

assert_array_equal(S_train_1, S_train_2)
assert_array_equal(S_test_1, S_test_2)

assert_array_equal(S_train_1, S_train_3)
assert_array_equal(S_test_1, S_test_3)

#---------------------------------------------------------------------------
# All default values (mode='oof_pred_bag')
#---------------------------------------------------------------------------

def test_all_defaults(self):

# Override global n_folds=5, because default value in stacking function is 4
n_folds=4

S_test_temp = np.zeros((X_test.shape[0], n_folds))
kf = KFold(n_splits = n_folds, shuffle = False, random_state = 0)
for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
# Split data and target
X_tr = X_train[tr_index]
y_tr = y_train[tr_index]
X_te = X_train[te_index]
y_te = y_train[te_index]
model = LinearRegression()
_ = model.fit(X_tr, y_tr)
S_test_temp[:, fold_counter] = model.predict(X_test)
S_test_1 = np.mean(S_test_temp, axis = 1).reshape(-1, 1)

model = LinearRegression()
S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds,
n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)

models = [LinearRegression()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, save_dir=temp_dir)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name, allow_pickle=True)
S_train_3 = S[0]
S_test_3 = S[1]

assert_array_equal(S_train_1, S_train_2)
assert_array_equal(S_test_1, S_test_2)

assert_array_equal(S_train_1, S_train_3)
assert_array_equal(S_test_1, S_test_3)


#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------

Expand Down
14 changes: 9 additions & 5 deletions vecstack/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,13 +151,14 @@ def stacking(models, X_train, y_train, X_test,
y_train : numpy 1d array
Target values
X_test : numpy array or sparse matrix of N-dim shape, e.g. 2-dim [n_test_samples, n_features]
X_test : numpy array or sparse matrix of N-dim shape, e.g. 2-dim [n_test_samples, n_features], or None
Test data
Note: X_test can be set to None when mode='oof'
sample_weight : numpy array of shape [n_train_samples]
sample_weight : numpy array of shape [n_train_samples], default None
Individual weights for each sample (passed to fit method of the model).
Note: sample_weight has length of full training set X_train and it would be
split automatically for each fold.
Note: sample_weight must have the same length as full training set X_train.
It will be split automatically for each fold.
regression : boolean, default True
If True - perform stacking for regression task,
Expand Down Expand Up @@ -188,7 +189,7 @@ def stacking(models, X_train, y_train, X_test,
mode: str, default 'oof_pred_bag' (alias 'A')
Note: for detailes see terminology below
'oof' - return only oof
'oof' - return only oof. X_test can be set to None
'oof_pred' (alias 'B') - return oof and pred
'oof_pred_bag' (alias 'A') - return oof and bagged pred
'pred' - return pred only
Expand Down Expand Up @@ -406,6 +407,9 @@ def your_metric(y_true, y_pred):
# If empty <models> list
if 0 == len(models):
raise ValueError('List of models is empty')
# X_test can be None only if mode='oof'
if X_test is None and mode != 'oof':
raise ValueError("X_test can be None only if mode='oof'")
# Check arrays
# y_train and sample_weight must be 1d ndarrays (i.e. row, not column)
X_train, y_train = check_X_y(X_train,
Expand Down

0 comments on commit 1bc58f4

Please sign in to comment.