-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathtest_regression_regresssklearn.py
132 lines (103 loc) · 4.51 KB
/
test_regression_regresssklearn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import os
import pytest
SKLEARN_NOT_AVAIL = False
try:
import sklearn
except ImportError:
SKLEARN_NOT_AVAIL = True
DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
REGRESS_DATA_DIR = os.path.join(DATA_DIR, "regression")
@pytest.mark.skipif(SKLEARN_NOT_AVAIL, reason="scikit-learn dependency not available")
def test_get_obj_params():
import rsgislib.regression.regresssklearn
rsgislib.regression.regresssklearn.get_ann_obj_params(1)
rsgislib.regression.regresssklearn.get_en_obj_params(1)
rsgislib.regression.regresssklearn.get_knn_obj_params(1)
rsgislib.regression.regresssklearn.get_kr_obj_params(1)
rsgislib.regression.regresssklearn.get_et_obj_params(1)
rsgislib.regression.regresssklearn.get_pls_obj_params(1)
@pytest.mark.skipif(SKLEARN_NOT_AVAIL, reason="scikit-learn dependency not available")
def test_create_search_obj():
import rsgislib.regression.regresssklearn
import numpy
from sklearn.ensemble import ExtraTreesRegressor
et_grid = {
"n_estimators": numpy.arange(100, 200, 100, dtype="uint16").tolist(),
"max_features": numpy.arange(1, 2, 1, dtype="uint8").tolist(),
"max_depth": numpy.arange(1, 2, 1, dtype="uint8").tolist(),
}
et_obj = ExtraTreesRegressor()
rsgislib.regression.regresssklearn.create_search_obj(
et_obj, et_grid, n_runs=25, n_cv=2, n_cores=1
)
@pytest.mark.skipif(SKLEARN_NOT_AVAIL, reason="scikit-learn dependency not available")
def test_perform_search_param_opt(tmp_path):
import rsgislib.regression.regresssklearn
import rsgislib.vectorattrs
import numpy
from sklearn.ensemble import ExtraTreesRegressor
et_grid = {
"n_estimators": numpy.arange(100, 200, 100, dtype="uint16").tolist(),
"max_features": numpy.arange(1, 2, 1, dtype="uint8").tolist(),
"max_depth": numpy.arange(1, 2, 1, dtype="uint8").tolist(),
}
et_obj = ExtraTreesRegressor()
skl_srch_obj = rsgislib.regression.regresssklearn.create_search_obj(
et_obj, et_grid, n_runs=25, n_cv=2, n_cores=1
)
vec_file = os.path.join(REGRESS_DATA_DIR, "sample_pts_test.geojson")
vec_lyr = "sample_pts_test"
y = rsgislib.vectorattrs.get_vec_cols_as_array(vec_file, vec_lyr, cols=["value"])
x = rsgislib.vectorattrs.get_vec_cols_as_array(
vec_file, vec_lyr, cols=["re_b4", "re_b5", "re_b6", "nir_b7", "nir_b8"]
)
opt_params_file = os.path.join(tmp_path, "out.json")
rsgislib.regression.regresssklearn.perform_search_param_opt(
opt_params_file, x, y, skl_srch_obj, data_scaler=None
)
assert os.path.exists(opt_params_file)
@pytest.mark.skipif(SKLEARN_NOT_AVAIL, reason="scikit-learn dependency not available")
def test_perform_kfold_fit():
import rsgislib.regression.regresssklearn
import rsgislib.vectorattrs
from sklearn.ensemble import ExtraTreesRegressor
et_obj = ExtraTreesRegressor()
vec_file = os.path.join(REGRESS_DATA_DIR, "sample_pts_train.geojson")
vec_lyr = "sample_pts_train"
y = rsgislib.vectorattrs.get_vec_cols_as_array(vec_file, vec_lyr, cols=["value"])
x = rsgislib.vectorattrs.get_vec_cols_as_array(
vec_file, vec_lyr, cols=["re_b4", "re_b5", "re_b6", "nir_b7", "nir_b8"]
)
rsgislib.regression.regresssklearn.perform_kfold_fit(
et_obj, x, y, n_splits=2, repeats=2, shuffle=True, data_scaler=None
)
@pytest.mark.skipif(SKLEARN_NOT_AVAIL, reason="scikit-learn dependency not available")
def test_apply_regress_sklearn_mdl(tmp_path):
import rsgislib.regression.regresssklearn
import rsgislib.vectorattrs
from sklearn.ensemble import ExtraTreesRegressor
et_obj = ExtraTreesRegressor()
vec_file = os.path.join(REGRESS_DATA_DIR, "sample_pts_train.geojson")
vec_lyr = "sample_pts_train"
y = rsgislib.vectorattrs.get_vec_cols_as_array(vec_file, vec_lyr, cols=["value"])
x = rsgislib.vectorattrs.get_vec_cols_as_array(
vec_file, vec_lyr, cols=["re_b4", "re_b5", "re_b6", "nir_b7", "nir_b8"]
)
et_obj.fit(x, y)
s2_img = os.path.join(DATA_DIR, "sen2_20210527_aber_subset.kea")
vld_msk_img = os.path.join(DATA_DIR, "sen2_20210527_aber_subset_vldmsk.kea")
out_img = os.path.join(tmp_path, "out_img.kea")
rsgislib.regression.regresssklearn.apply_regress_sklearn_mdl(
et_obj,
1,
s2_img,
[4, 5, 6, 7, 8],
vld_msk_img,
1,
out_img,
gdalformat="KEA",
out_band_names=None,
calc_stats=True,
out_no_date_val=0.0,
)
assert os.path.exists(out_img)