From dd04d3fe94dcbdad77a60670bd1f9045ed67f3fe Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 24 Feb 2021 11:52:52 +0700 Subject: [PATCH 01/30] refactor --- xrspatial/focal.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) mode change 100644 => 100755 xrspatial/focal.py diff --git a/xrspatial/focal.py b/xrspatial/focal.py old mode 100644 new mode 100755 index ce21dc4e..38fdcb7c --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -6,7 +6,7 @@ import numpy as np from xarray import DataArray -from xrspatial.utils import ngjit +from xrspatial.utils import ngjit, get_dataarray_resolution from xrspatial.convolution import convolve_2d warnings.simplefilter('default') @@ -54,21 +54,16 @@ def _get_distance(distance_str): number = splits[0] if not _is_numeric(number): - raise ValueError( - "Invalid value.\n" - "Distance should be a possitive numeric value.\n") + raise ValueError("Distance should be a positive numeric value.\n") distance = float(number) if distance <= 0: - raise ValueError( - "Invalid value.\n" - "Distance should be a possitive.\n") + raise ValueError("Distance should be a positive.\n") unit = unit.lower() unit = unit.replace(' ', '') if unit not in UNITS: raise ValueError( - "Invalid value.\n" "Distance unit should be one of the following: \n" "meter (meter, meters, m),\n" "kilometer (kilometer, kilometers, km),\n" @@ -80,7 +75,7 @@ def _get_distance(distance_str): return meters -def calc_cellsize(raster, x='x', y='y'): +def calc_cellsize(raster): if 'unit' in raster.attrs: unit = raster.attrs['unit'] else: @@ -88,10 +83,7 @@ def calc_cellsize(raster, x='x', y='y'): warnings.warn('Raster distance unit not provided. ' 'Use meter as default.', Warning) - # TODO: check coordinate system - # if in lat-lon, need to convert to meter, lnglat_to_meters - cellsize_x = raster.coords[x].data[1] - raster.coords[x].data[0] - cellsize_y = raster.coords[y].data[1] - raster.coords[y].data[0] + cellsize_x, cellsize_y = get_dataarray_resolution(raster) cellsize_x = _to_meters(cellsize_x, unit) cellsize_y = _to_meters(cellsize_y, unit) From 396bad360a4d4641f4521179689d11a0bef593a0 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 24 Feb 2021 11:57:50 +0700 Subject: [PATCH 02/30] move kernel creation to convolution.py --- xrspatial/convolution.py | 172 +++++++++++++++++++++++++++++++++- xrspatial/focal.py | 165 -------------------------------- xrspatial/tests/test_focal.py | 13 +-- 3 files changed, 174 insertions(+), 176 deletions(-) mode change 100644 => 100755 xrspatial/convolution.py mode change 100644 => 100755 xrspatial/tests/test_focal.py diff --git a/xrspatial/convolution.py b/xrspatial/convolution.py old mode 100644 new mode 100755 index dce9bbbd..41d74090 --- a/xrspatial/convolution.py +++ b/xrspatial/convolution.py @@ -1,10 +1,178 @@ +import re +import numpy as np + +import warnings +warnings.simplefilter('default') + from numba import cuda, float32, int32, prange, jit -from xrspatial.utils import ngjit from xrspatial.utils import has_cuda from xrspatial.utils import cuda_args +from xrspatial.utils import get_dataarray_resolution -import numpy as np + +DEFAULT_UNIT = 'meter' +METER = 1 +FOOT = 0.3048 +KILOMETER = 1000 +MILE = 1609.344 +UNITS = {'meter': METER, 'meters': METER, 'm': METER, + 'feet': FOOT, 'foot': FOOT, 'ft': FOOT, + 'miles': MILE, 'mls': MILE, 'ml': MILE, + 'kilometer': KILOMETER, 'kilometers': KILOMETER, 'km': KILOMETER} + + +def _is_numeric(s): + try: + float(s) + return True + except ValueError: + return False + + +def _to_meters(d, unit): + return d * UNITS[unit] + + +def _get_distance(distance_str): + # return distance in meters + + # spit string into numbers and text + splits = [x for x in re.split(r'(-?\d*\.?\d+)', distance_str) if x != ''] + if len(splits) not in [1, 2]: + raise ValueError("Invalid distance.") + + unit = DEFAULT_UNIT + if len(splits) == 1: + warnings.warn('Raster distance unit not provided. ' + 'Use meter as default.', Warning) + elif len(splits) == 2: + unit = splits[1] + + number = splits[0] + if not _is_numeric(number): + raise ValueError("Distance should be a positive numeric value.\n") + + distance = float(number) + if distance <= 0: + raise ValueError("Distance should be a positive.\n") + + unit = unit.lower() + unit = unit.replace(' ', '') + if unit not in UNITS: + raise ValueError( + "Distance unit should be one of the following: \n" + "meter (meter, meters, m),\n" + "kilometer (kilometer, kilometers, km),\n" + "foot (foot, feet, ft),\n" + "mile (mile, miles, ml, mls)") + + # convert distance to meters + meters = _to_meters(distance, unit) + return meters + + +def calc_cellsize(raster): + if 'unit' in raster.attrs: + unit = raster.attrs['unit'] + else: + unit = DEFAULT_UNIT + warnings.warn('Raster distance unit not provided. ' + 'Use meter as default.', Warning) + + cellsize_x, cellsize_y = get_dataarray_resolution(raster) + cellsize_x = _to_meters(cellsize_x, unit) + cellsize_y = _to_meters(cellsize_y, unit) + + # When converting from lnglat_to_meters, could have negative cellsize in y + return cellsize_x, np.abs(cellsize_y) + + +def _gen_ellipse_kernel(half_w, half_h): + # x values of interest + x = np.linspace(-half_w, half_w, 2 * half_w + 1) + # y values of interest, as a "column" array + y = np.linspace(-half_h, half_h, 2 * half_h + 1)[:, None] + + # True for points inside the ellipse + # (x / a)^2 + (y / b)^2 <= 1, avoid division to avoid rounding issue + ellipse = (x * half_h) ** 2 + (y * half_w) ** 2 <= (half_w * half_h) ** 2 + return ellipse.astype(float) + + +def _validate_kernel(kernel): + """Validatetes that the kernel is a numpy array and has odd dimensions.""" + + if not isinstance(kernel, np.ndarray): + raise ValueError( + "Received a custom kernel that is not a Numpy array.", + """The kernel received was of type {} and needs to be of type `ndarray` + """.format(type(kernel)) + ) + else: + rows, cols = kernel.shape + + if (rows % 2 == 0 or cols % 2 == 0): + raise ValueError( + "Received custom kernel with improper dimensions.", + """A custom kernel needs to have an odd shape, the + supplied kernel has {} rows and {} columns. + """.format(rows, cols) + ) + + +def circle_kernel(cellsize_x, cellsize_y, radius): + # validate radius, convert radius to meters + r = _get_distance(str(radius)) + + kernel_half_w = int(r / cellsize_x) + kernel_half_h = int(r / cellsize_y) + + kernel = _gen_ellipse_kernel(kernel_half_w, kernel_half_h) + return kernel + + +def annulus_kernel(cellsize_x, cellsize_y, outer_radius, inner_radius): + + # validate radii, convert to meters + r2 = _get_distance(str(outer_radius)) + r1 = _get_distance(str(inner_radius)) + + # Validate that outer radius is indeed outer radius + if r2 > r1: + r_outer = r2 + r_inner = r1 + else: + r_outer = r1 + r_inner = r2 + + if r_outer - r_inner < np.sqrt((cellsize_x / 2)**2 + (cellsize_y / 2)**2): + warnings.warn('Annulus radii are closer than cellsize distance.', + Warning) + + # Get the two circular kernels for the annulus + kernel_outer = circle_kernel(cellsize_x, cellsize_y, outer_radius) + kernel_inner = circle_kernel(cellsize_x, cellsize_y, inner_radius) + + # Need to pad kernel_inner to get it the same shape and centered + # in kernel_outer + pad_vals = np.array(kernel_outer.shape) - np.array(kernel_inner.shape) + pad_kernel = np.pad(kernel_inner, + # Pad ((before_rows, after_rows), + # (before_cols, after_cols)) + pad_width=((pad_vals[0] // 2, pad_vals[0] // 2), + (pad_vals[1] // 2, pad_vals[1] // 2)), + mode='constant', + constant_values=0) + # Get annulus by subtracting inner from outer + kernel = kernel_outer - pad_kernel + return kernel + + +def custom_kernel(kernel): + """Validates a custom kernel. If the kernel is valid, returns itself.""" + _validate_kernel(kernel) + return kernel def convolve_2d(image, kernel, pad=True, use_cuda=True): diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 38fdcb7c..34c17b70 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -14,171 +14,6 @@ # TODO: Make convolution more generic with numba first-class functions. -DEFAULT_UNIT = 'meter' -METER = 1 -FOOT = 0.3048 -KILOMETER = 1000 -MILE = 1609.344 -UNITS = {'meter': METER, 'meters': METER, 'm': METER, - 'feet': FOOT, 'foot': FOOT, 'ft': FOOT, - 'miles': MILE, 'mls': MILE, 'ml': MILE, - 'kilometer': KILOMETER, 'kilometers': KILOMETER, 'km': KILOMETER} - - -def _is_numeric(s): - try: - float(s) - return True - except ValueError: - return False - - -def _to_meters(d, unit): - return d * UNITS[unit] - - -def _get_distance(distance_str): - # return distance in meters - - # spit string into numbers and text - splits = [x for x in re.split(r'(-?\d*\.?\d+)', distance_str) if x != ''] - if len(splits) not in [1, 2]: - raise ValueError("Invalid distance.") - - unit = DEFAULT_UNIT - if len(splits) == 1: - warnings.warn('Raster distance unit not provided. ' - 'Use meter as default.', Warning) - elif len(splits) == 2: - unit = splits[1] - - number = splits[0] - if not _is_numeric(number): - raise ValueError("Distance should be a positive numeric value.\n") - - distance = float(number) - if distance <= 0: - raise ValueError("Distance should be a positive.\n") - - unit = unit.lower() - unit = unit.replace(' ', '') - if unit not in UNITS: - raise ValueError( - "Distance unit should be one of the following: \n" - "meter (meter, meters, m),\n" - "kilometer (kilometer, kilometers, km),\n" - "foot (foot, feet, ft),\n" - "mile (mile, miles, ml, mls)") - - # convert distance to meters - meters = _to_meters(distance, unit) - return meters - - -def calc_cellsize(raster): - if 'unit' in raster.attrs: - unit = raster.attrs['unit'] - else: - unit = DEFAULT_UNIT - warnings.warn('Raster distance unit not provided. ' - 'Use meter as default.', Warning) - - cellsize_x, cellsize_y = get_dataarray_resolution(raster) - cellsize_x = _to_meters(cellsize_x, unit) - cellsize_y = _to_meters(cellsize_y, unit) - - # When converting from lnglat_to_meters, could have negative cellsize in y - return cellsize_x, np.abs(cellsize_y) - - -def _gen_ellipse_kernel(half_w, half_h): - # x values of interest - x = np.linspace(-half_w, half_w, 2 * half_w + 1) - # y values of interest, as a "column" array - y = np.linspace(-half_h, half_h, 2 * half_h + 1)[:, None] - - # True for points inside the ellipse - # (x / a)^2 + (y / b)^2 <= 1, avoid division to avoid rounding issue - ellipse = (x * half_h) ** 2 + (y * half_w) ** 2 <= (half_w * half_h) ** 2 - return ellipse.astype(float) - - -def _validate_kernel(kernel): - """Validatetes that the kernel is a numpy array and has odd dimensions.""" - - if not isinstance(kernel, np.ndarray): - raise ValueError( - "Received a custom kernel that is not a Numpy array.", - """The kernel received was of type {} and needs to be of type `ndarray` - """.format(type(kernel)) - ) - else: - rows, cols = kernel.shape - - if (rows % 2 == 0 or cols % 2 == 0): - raise ValueError( - "Received custom kernel with improper dimensions.", - """A custom kernel needs to have an odd shape, the - supplied kernel has {} rows and {} columns. - """.format(rows, cols) - ) - - -def circle_kernel(cellsize_x, cellsize_y, radius): - # validate radius, convert radius to meters - r = _get_distance(str(radius)) - - kernel_half_w = int(r / cellsize_x) - kernel_half_h = int(r / cellsize_y) - - kernel = _gen_ellipse_kernel(kernel_half_w, kernel_half_h) - return kernel - - -def annulus_kernel(cellsize_x, cellsize_y, outer_radius, inner_radius): - - # validate radii, convert to meters - r2 = _get_distance(str(outer_radius)) - r1 = _get_distance(str(inner_radius)) - - # Validate that outer radius is indeed outer radius - if r2 > r1: - r_outer = r2 - r_inner = r1 - else: - r_outer = r1 - r_inner = r2 - - if r_outer - r_inner < np.sqrt((cellsize_x / 2)**2 + \ - (cellsize_y / 2)**2): - warnings.warn('Annulus radii are closer than cellsize distance.', - Warning) - - # Get the two circular kernels for the annulus - kernel_outer = circle_kernel(cellsize_x, cellsize_y, outer_radius) - kernel_inner = circle_kernel(cellsize_x, cellsize_y, inner_radius) - - # Need to pad kernel_inner to get it the same shape and centered - # in kernel_outer - pad_vals = np.array(kernel_outer.shape) - np.array(kernel_inner.shape) - pad_kernel = np.pad(kernel_inner, - # Pad ((before_rows, after_rows), - # (before_cols, after_cols)) - pad_width=((pad_vals[0] // 2, pad_vals[0] // 2), - (pad_vals[1] // 2, pad_vals[1] // 2)), - mode='constant', - constant_values=0) - # Get annulus by subtracting inner from outer - kernel = kernel_outer - pad_kernel - return kernel - - -def custom_kernel(kernel): - """Validates a custom kernel. If the kernel is valid, returns itself.""" - _validate_kernel(kernel) - return kernel - - @ngjit def _mean(data, excludes): out = np.zeros_like(data) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py old mode 100644 new mode 100755 index 0943cf43..f5a911c8 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -2,15 +2,10 @@ import numpy as np from xrspatial import mean -from xrspatial.convolution import convolve_2d -from xrspatial.focal import ( - calc_cellsize, - calc_mean, - calc_sum, - hotspots, - circle_kernel, - annulus_kernel, - _validate_kernel, +from xrspatial.focal import hotspots +from xrspatial.convolution import ( + convolve_2d, calc_cellsize, _validate_kernel, + circle_kernel, annulus_kernel, ) import pytest From 93e18eec3803c25a6546b3d7823f5acefd80e91d Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 24 Feb 2021 14:08:34 +0700 Subject: [PATCH 03/30] import fix --- xrspatial/focal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 34c17b70..4c7837de 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -6,8 +6,8 @@ import numpy as np from xarray import DataArray -from xrspatial.utils import ngjit, get_dataarray_resolution -from xrspatial.convolution import convolve_2d +from xrspatial.utils import ngjit +from xrspatial.convolution import convolve_2d, _validate_kernel warnings.simplefilter('default') From 58cd2f9007775ad94a15e94e39d2c49ad2eabf97 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 24 Feb 2021 16:51:09 +0700 Subject: [PATCH 04/30] convolve_2d: remove padding, remove use_cuda param --- xrspatial/convolution.py | 112 ++++++++-------- xrspatial/focal.py | 2 +- xrspatial/tests/test_focal.py | 246 ++++++++++++++-------------------- 3 files changed, 157 insertions(+), 203 deletions(-) diff --git a/xrspatial/convolution.py b/xrspatial/convolution.py index 41d74090..cd8a7af4 100755 --- a/xrspatial/convolution.py +++ b/xrspatial/convolution.py @@ -10,6 +10,13 @@ from xrspatial.utils import cuda_args from xrspatial.utils import get_dataarray_resolution +# 3rd-party +try: + import cupy +except ImportError: + class cupy(object): + ndarray = False + DEFAULT_UNIT = 'meter' METER = 1 @@ -175,61 +182,25 @@ def custom_kernel(kernel): return kernel -def convolve_2d(image, kernel, pad=True, use_cuda=True): - """Function to call the 2D convolution via Numba. - The Numba convolution function does not account for an edge so - if we wish to take this into account, will pad the image array. - """ - # Don't allow padding on (1, 1) kernel - if (kernel.shape[0] == 1 and kernel.shape[1] == 1): - pad = False - - if pad: - pad_rows = kernel.shape[0] // 2 - pad_cols = kernel.shape[1] // 2 - pad_width = ((pad_rows, pad_rows), - (pad_cols, pad_cols)) - else: - # If padding is not desired, set pads to 0 - pad_rows = 0 - pad_cols = 0 - pad_width = 0 - - padded_image = np.pad(image, pad_width=pad_width, mode="reflect") - result = np.empty_like(padded_image) - - if has_cuda() and use_cuda: - griddim, blockdim = cuda_args(padded_image.shape) - _convolve_2d_cuda[griddim, blockdim](result, kernel, padded_image) - else: - result = _convolve_2d(kernel, padded_image) - - if pad: - result = result[pad_rows:-pad_rows, pad_cols:-pad_cols] - - if result.shape != image.shape: - raise ValueError("Output and input rasters are not the same shape.") - - return result - - @jit(nopython=True, nogil=True, parallel=True) -def _convolve_2d(kernel, image): - """Apply kernel to image.""" +def _convolve_2d_cpu(data, kernel): + """Apply kernel to data image.""" - nx = image.shape[0] - ny = image.shape[1] + # TODO: handle nan + + nx = data.shape[0] + ny = data.shape[1] nkx = kernel.shape[0] nky = kernel.shape[1] wkx = nkx // 2 wky = nky // 2 - result = np.zeros(image.shape, dtype=float32) + result = np.zeros(data.shape, dtype=float32) - for i in prange(0, nx, 1): + for i in prange(wkx, nx-wkx): iimin = max(i - wkx, 0) iimax = min(i + wkx + 1, nx) - for j in prange(0, ny, 1): + for j in prange(wky, ny-wky): jjmin = max(j - wky, 0) jjmax = min(j + wky + 1, ny) num = 0.0 @@ -237,7 +208,7 @@ def _convolve_2d(kernel, image): iii = wkx + ii - i for jj in range(jjmin, jjmax, 1): jjj = wky + jj - j - num += kernel[iii, jjj] * image[ii, jj] + num += kernel[iii, jjj] * data[ii, jj] result[i, j] = num return result @@ -245,34 +216,61 @@ def _convolve_2d(kernel, image): # https://www.vincent-lunot.com/post/an-introduction-to-cuda-in-python-part-3/ @cuda.jit -def _convolve_2d_cuda(result, kernel, image): - # expects a 2D grid and 2D blocks, +def _convolve_2d_cuda(data, kernel, result): + # expect a 2D grid and 2D blocks, # a kernel with odd numbers of rows and columns, (-1-) # a grayscale image # (-2-) 2D coordinates of the current thread: i, j = cuda.grid(2) - # (-3-) if the thread coordinates are outside of the image, we ignore the thread: - image_rows, image_cols = image.shape - if (i >= image_rows) or (j >= image_cols): + # (-3-) if the thread coordinates are outside of the data image, we ignore the thread: + data_rows, data_cols = data.shape + if (i >= data_rows) or (j >= data_cols): return - # To compute the result at coordinates (i, j), we need to use delta_rows rows of the image + # To compute the result at coordinates (i, j), we need to use delta_rows rows of the array # before and after the i_th row, - # as well as delta_cols columns of the image before and after the j_th column: + # as well as delta_cols columns of the array before and after the j_th column: delta_rows = kernel.shape[0] // 2 delta_cols = kernel.shape[1] // 2 # The result at coordinates (i, j) is equal to - # sum_{k, l} kernel[k, l] * image[i - k + delta_rows, j - l + delta_cols] + # sum_{k, l} kernel[k, l] * data[i - k + delta_rows, j - l + delta_cols] # with k and l going through the whole kernel array: s = 0 for k in range(kernel.shape[0]): for l in range(kernel.shape[1]): i_k = i - k + delta_rows j_l = j - l + delta_cols - # (-4-) Check if (i_k, j_k) coordinates are inside the image: - if (i_k >= 0) and (i_k < image_rows) and (j_l >= 0) and (j_l < image_cols): - s += kernel[k, l] * image[i_k, j_l] + # (-4-) Check if (i_k, j_k) coordinates are inside the array: + if (i_k >= 0) and (i_k < data_rows) and (j_l >= 0) and (j_l < data_cols): + s += kernel[k, l] * data[i_k, j_l] result[i, j] = s + + +def _convolve_2d_gpu(data, kernel): + result = cupy.empty_like(data) + griddim, blockdim = cuda_args(data.shape) + _convolve_2d_cuda[griddim, blockdim](data, kernel, result) + return result + + +def convolve_2d(data, kernel): + """Function to call the 2D convolution via Numba. + The Numba convolution function does not account for an edge so + if we wish to take this into account, will pad the data array. + """ + + # numpy case + if isinstance(data, np.ndarray): + out = _convolve_2d_cpu(data, kernel) + + # cupy case + elif has_cuda() and isinstance(data, cupy.ndarray): + out = _convolve_2d_gpu(data, kernel) + + else: + raise TypeError('Unsupported Array Type: {}'.format(type(data))) + + return out diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 4c7837de..2743c2dd 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -224,7 +224,7 @@ def hotspots(raster, kernel, x='x', y='y'): "(%s, %s)".format(y, x)) # apply kernel to raster values - mean_array = convolve_2d(raster.values, kernel / kernel.sum(), pad=True) + mean_array = convolve_2d(raster.values, kernel / kernel.sum()) # calculate z-scores global_mean = np.nanmean(raster.values) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index f5a911c8..b8e6c62e 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -58,163 +58,119 @@ def test_mean_transfer_function(): da_mean[:, -1] = data_random[:, -1] assert abs(da_mean.mean() - data_random.mean()) < 10**-3 -def test_kernel(): - n, m = 6, 6 - raster = xr.DataArray(np.ones((n, m)), dims=['y', 'x']) - raster['x'] = np.linspace(0, n, n) - raster['y'] = np.linspace(0, m, m) - - cellsize_x, cellsize_y = calc_cellsize(raster) - # Passing invalid radius units for `circle` - with pytest.raises(Exception) as e_info: - circle_kernel(cellsize_x, cellsize_y, "10 furlongs") - assert e_info - - # Passing invalid radius for `annulus` - with pytest.raises(Exception) as e_info: - annulus_kernel(cellsize_x, cellsize_y, 4, "2 leagues") - assert e_info - - # Passing custom kernel with even dimensions - with pytest.raises(Exception) as e_info: - _validate_kernel(np.ones((2, 2))) - assert e_info - - # Passing custom kernel of wrong type - with pytest.raises(Exception) as e_info: - _validate_kernel([[1, 1, 1]]) - assert e_info - def test_convolution(): - n, m = 6, 6 - raster = xr.DataArray(np.ones((n, m)), dims=['y', 'x']) + data = np.array([[0., 1., 1., 1., 1., 1.], + [1., 0., 1., 1., 1., 1.], + [1., 1., 0., 1., 1., 1.], + [1., 1., 1., np.nan, 1., 1.], + [1., 1., 1., 1., 0., 1.], + [1., 1., 1., 1., 1., 0.]]) + m, n = data.shape + raster = xr.DataArray(data, dims=['y', 'x']) raster['x'] = np.linspace(0, n, n) raster['y'] = np.linspace(0, m, m) cellsize_x, cellsize_y = calc_cellsize(raster) - # add some nan pixels - nan_cells = [(i, i) for i in range(n)] - for cell in nan_cells: - raster[cell[0], cell[1]] = np.nan - # kernel array = [[1]] - kernel = np.ones((1, 1)) - sum_output_1 = convolve_2d(raster.values, kernel) - # np.nansum(np.array([np.nan])) = 0.0 - expected_out_sum_1 = np.array([[0., 1., 1., 1., 1., 1.], - [1., 0., 1., 1., 1., 1.], - [1., 1., 0., 1., 1., 1.], - [1., 1., 1., 0., 1., 1.], - [1., 1., 1., 1., 0., 1.], - [1., 1., 1., 1., 1., 0.]]) - # Convolution will return np.nan, so convert nan to 0 - assert np.all(np.nan_to_num(expected_out_sum_1) == expected_out_sum_1) - - # np.nanmean(np.array([np.nan])) = nan - mean_output_1 = convolve_2d(raster.values, kernel / kernel.sum()) - for cell in nan_cells: - assert np.isnan(mean_output_1[cell[0], cell[1]]) - # remaining cells are 1s - for i in range(n): - for j in range(m): - if i != j: - assert mean_output_1[i, j] == 1 + kernel1 = np.ones((1, 1)) + output_1 = convolve_2d(raster.data, kernel1) + expected_output_1 = np.array([[0., 1., 1., 1., 1., 1.], + [1., 0., 1., 1., 1., 1.], + [1., 1., 0., 1., 1., 1.], + [1., 1., 1., np.nan, 1., 1.], + [1., 1., 1., 1., 0., 1.], + [1., 1., 1., 1., 1., 0.]]) + assert np.isclose(output_1, expected_output_1, equal_nan=True).all() # kernel array: [[0, 1, 0], # [1, 1, 1], # [0, 1, 0]] - kernel = circle_kernel(cellsize_x, cellsize_y, 2) - sum_output_2 = convolve_2d(np.nan_to_num(raster.values), kernel, pad=False) - expected_out_sum_2 = np.array([[2., 2., 4., 4., 4., 3.], - [2., 4., 3., 5., 5., 4.], - [4., 3., 4., 3., 5., 4.], - [4., 5., 3., 4., 3., 4.], - [4., 5., 5., 3., 4., 2.], - [3., 4., 4., 4., 2., 2.]]) - - assert np.all(sum_output_2 == expected_out_sum_2) - - mean_output_2 = convolve_2d(np.ones((n, m)), kernel / kernel.sum(), pad=True) - expected_mean_output_2 = np.ones((n, m)) - assert np.all(mean_output_2 == expected_mean_output_2) + kernel2 = circle_kernel(cellsize_x, cellsize_y, 2) + output_2 = convolve_2d(raster.data, kernel2) + expected_output_2 = np.array([[0., 0., 0., 0., 0., 0.], + [0., 4., 3., 5., 5., 0.], + [0., 3., np.nan, np.nan, np.nan, 0.], + [0., 5., np.nan, np.nan, np.nan, 0.], + [0., 5., np.nan, np.nan, np.nan, 0.], + [0., 0., 0., 0., 0., 0.]]) + # kernel2 is of 3x3, thus the border edge is 1 cell long. + # currently, ignoring border edge (i.e values in edges are all 0s) + assert np.isclose(output_2, expected_output_2, equal_nan=True).all() # kernel array: [[0, 1, 0], # [1, 0, 1], # [0, 1, 0]] - kernel = annulus_kernel(cellsize_x, cellsize_y, 2.0, 0.5) - sum_output_3 = convolve_2d(np.nan_to_num(raster.values), kernel, pad=False) - expected_out_sum_3 = np.array([[2., 1., 3., 3., 3., 2.], - [1., 4., 2., 4., 4., 3.], - [3., 2., 4., 2., 4., 3.], - [3., 4., 2., 4., 2., 3.], - [3., 4., 4., 2., 4., 1.], - [2., 3., 3., 3., 1., 2.]]) - - assert np.all(sum_output_3 == expected_out_sum_3) - - mean_output_3 = convolve_2d(np.ones((n, m)), kernel / kernel.sum(), pad=True) - expected_mean_output_3 = np.ones((n, m)) - assert np.all(mean_output_3 == expected_mean_output_3) - - -def test_hotspot(): - n, m = 10, 10 - raster = xr.DataArray(np.zeros((n, m), dtype=float), dims=['y', 'x']) - raster['x'] = np.linspace(0, n, n) - raster['y'] = np.linspace(0, m, m) - cellsize_x, cellsize_y = calc_cellsize(raster) - - kernel = circle_kernel(cellsize_x, cellsize_y, 2.0) - - all_idx = zip(*np.where(raster.values == 0)) - - nan_cells = [(i, i) for i in range(m)] - for cell in nan_cells: - raster[cell[0], cell[1]] = np.nan - - # add some extreme values - hot_region = [(1, 1), (1, 2), (1, 3), - (2, 1), (2, 2), (2, 3), - (3, 1), (3, 2), (3, 3)] - cold_region = [(7, 7), (7, 8), (7, 9), - (8, 7), (8, 8), (8, 9), - (9, 7), (9, 8), (9, 9)] - for p in hot_region: - raster[p[0], p[1]] = 10000 - for p in cold_region: - raster[p[0], p[1]] = -10000 - - no_significant_region = [id for id in all_idx if id not in hot_region and - id not in cold_region] - - hotspots_output = hotspots(raster, kernel) - - # check output's properties - # output must be an xarray DataArray - assert isinstance(hotspots_output, xr.DataArray) - assert isinstance(hotspots_output.values, np.ndarray) - assert issubclass(hotspots_output.values.dtype.type, np.int8) - - # shape, dims, coords, attr preserved - assert raster.shape == hotspots_output.shape - assert raster.dims == hotspots_output.dims - assert raster.attrs == hotspots_output.attrs - for coord in raster.coords: - assert np.all(raster[coord] == hotspots_output[coord]) - - # no nan in output - assert not np.isnan(np.min(hotspots_output)) - - # output of extreme regions are non-zeros - # hot spots - hot_spot = np.asarray([hotspots_output[p] for p in hot_region]) - assert np.all(hot_spot >= 0) - assert np.sum(hot_spot) > 0 - # cold spots - cold_spot = np.asarray([hotspots_output[p] for p in cold_region]) - assert np.all(cold_spot <= 0) - assert np.sum(cold_spot) < 0 - # output of no significant regions are 0s - no_sign = np.asarray([hotspots_output[p] for p in no_significant_region]) - assert np.all(no_sign == 0) + kernel3 = annulus_kernel(cellsize_x, cellsize_y, 2.0, 0.5) + output_3 = convolve_2d(np.nan_to_num(raster.values), kernel3) + expected_output_3 = np.array([[0., 0., 0., 0., 0., 0.], + [0., 4., 2., 4., 4., 0.], + [0., 2., 4., 2., 4., 0.], + [0., 4., 2., 4., 2., 0.], + [0., 4., 4., 2., 4., 0.], + [0., 0., 0., 0., 0., 0.]]) + # kernel3 is of 3x3, thus the border edge is 1 cell long. + # currently, ignoring border edge (i.e values in edges are all 0s) + assert np.isclose(output_3, expected_output_3, equal_nan=True).all() + + +# def test_hotspot(): +# n, m = 10, 10 +# raster = xr.DataArray(np.zeros((n, m), dtype=float), dims=['y', 'x']) +# raster['x'] = np.linspace(0, n, n) +# raster['y'] = np.linspace(0, m, m) +# cellsize_x, cellsize_y = calc_cellsize(raster) +# +# kernel = circle_kernel(cellsize_x, cellsize_y, 2.0) +# +# all_idx = zip(*np.where(raster.values == 0)) +# +# nan_cells = [(i, i) for i in range(m)] +# for cell in nan_cells: +# raster[cell[0], cell[1]] = np.nan +# +# # add some extreme values +# hot_region = [(1, 1), (1, 2), (1, 3), +# (2, 1), (2, 2), (2, 3), +# (3, 1), (3, 2), (3, 3)] +# cold_region = [(7, 7), (7, 8), (7, 9), +# (8, 7), (8, 8), (8, 9), +# (9, 7), (9, 8), (9, 9)] +# for p in hot_region: +# raster[p[0], p[1]] = 10000 +# for p in cold_region: +# raster[p[0], p[1]] = -10000 +# +# no_significant_region = [id for id in all_idx if id not in hot_region and +# id not in cold_region] +# +# hotspots_output = hotspots(raster, kernel) +# +# # check output's properties +# # output must be an xarray DataArray +# assert isinstance(hotspots_output, xr.DataArray) +# assert isinstance(hotspots_output.values, np.ndarray) +# assert issubclass(hotspots_output.values.dtype.type, np.int8) +# +# # shape, dims, coords, attr preserved +# assert raster.shape == hotspots_output.shape +# assert raster.dims == hotspots_output.dims +# assert raster.attrs == hotspots_output.attrs +# for coord in raster.coords: +# assert np.all(raster[coord] == hotspots_output[coord]) +# +# # no nan in output +# assert not np.isnan(np.min(hotspots_output)) +# +# # output of extreme regions are non-zeros +# # hot spots +# hot_spot = np.asarray([hotspots_output[p] for p in hot_region]) +# assert np.all(hot_spot >= 0) +# assert np.sum(hot_spot) > 0 +# # cold spots +# cold_spot = np.asarray([hotspots_output[p] for p in cold_region]) +# assert np.all(cold_spot <= 0) +# assert np.sum(cold_spot) < 0 +# # output of no significant regions are 0s +# no_sign = np.asarray([hotspots_output[p] for p in no_significant_region]) +# assert np.all(no_sign == 0) From 6f393f0ad1c8f958d20e4fef52d9bad1f5b71c84 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 24 Feb 2021 19:57:40 +0700 Subject: [PATCH 05/30] convolve_2d: add cupy tests --- xrspatial/tests/test_focal.py | 89 ++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 27 deletions(-) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index b8e6c62e..a6e2f317 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -1,6 +1,11 @@ +import pytest import xarray as xr import numpy as np +import dask.array as da + +from xrspatial.utils import doesnt_have_cuda + from xrspatial import mean from xrspatial.focal import hotspots from xrspatial.convolution import ( @@ -59,35 +64,32 @@ def test_mean_transfer_function(): assert abs(da_mean.mean() - data_random.mean()) < 10**-3 +convolve_2d_data = np.array([[0., 1., 1., 1., 1., 1.], + [1., 0., 1., 1., 1., 1.], + [1., 1., 0., 1., 1., 1.], + [1., 1., 1., np.nan, 1., 1.], + [1., 1., 1., 1., 0., 1.], + [1., 1., 1., 1., 1., 0.]]) + + def test_convolution(): - data = np.array([[0., 1., 1., 1., 1., 1.], - [1., 0., 1., 1., 1., 1.], - [1., 1., 0., 1., 1., 1.], - [1., 1., 1., np.nan, 1., 1.], - [1., 1., 1., 1., 0., 1.], - [1., 1., 1., 1., 1., 0.]]) - m, n = data.shape - raster = xr.DataArray(data, dims=['y', 'x']) - raster['x'] = np.linspace(0, n, n) - raster['y'] = np.linspace(0, m, m) - cellsize_x, cellsize_y = calc_cellsize(raster) - - # kernel array = [[1]] + data = convolve_2d_data + kernel1 = np.ones((1, 1)) - output_1 = convolve_2d(raster.data, kernel1) + output_1 = convolve_2d(data, kernel1) expected_output_1 = np.array([[0., 1., 1., 1., 1., 1.], [1., 0., 1., 1., 1., 1.], [1., 1., 0., 1., 1., 1.], [1., 1., 1., np.nan, 1., 1.], [1., 1., 1., 1., 0., 1.], [1., 1., 1., 1., 1., 0.]]) + assert isinstance(output_1, np.ndarray) assert np.isclose(output_1, expected_output_1, equal_nan=True).all() - # kernel array: [[0, 1, 0], - # [1, 1, 1], - # [0, 1, 0]] - kernel2 = circle_kernel(cellsize_x, cellsize_y, 2) - output_2 = convolve_2d(raster.data, kernel2) + kernel2 = np.array([[0, 1, 0], + [1, 1, 1], + [0, 1, 0]]) + output_2 = convolve_2d(data, kernel2) expected_output_2 = np.array([[0., 0., 0., 0., 0., 0.], [0., 4., 3., 5., 5., 0.], [0., 3., np.nan, np.nan, np.nan, 0.], @@ -96,24 +98,57 @@ def test_convolution(): [0., 0., 0., 0., 0., 0.]]) # kernel2 is of 3x3, thus the border edge is 1 cell long. # currently, ignoring border edge (i.e values in edges are all 0s) + assert isinstance(output_2, np.ndarray) assert np.isclose(output_2, expected_output_2, equal_nan=True).all() - # kernel array: [[0, 1, 0], - # [1, 0, 1], - # [0, 1, 0]] - kernel3 = annulus_kernel(cellsize_x, cellsize_y, 2.0, 0.5) - output_3 = convolve_2d(np.nan_to_num(raster.values), kernel3) + kernel3 = np.array([[0, 1, 0], + [1, 0, 1], + [0, 1, 0]]) + output_3 = convolve_2d(data, kernel3) expected_output_3 = np.array([[0., 0., 0., 0., 0., 0.], [0., 4., 2., 4., 4., 0.], - [0., 2., 4., 2., 4., 0.], - [0., 4., 2., 4., 2., 0.], - [0., 4., 4., 2., 4., 0.], + [0., 2., np.nan, np.nan, np.nan, 0.], + [0., 4., np.nan, np.nan, np.nan, 0.], + [0., 4., np.nan, np.nan, np.nan, 0.], [0., 0., 0., 0., 0., 0.]]) # kernel3 is of 3x3, thus the border edge is 1 cell long. # currently, ignoring border edge (i.e values in edges are all 0s) + assert isinstance(output_3, np.ndarray) assert np.isclose(output_3, expected_output_3, equal_nan=True).all() +@pytest.mark.skipif(doesnt_have_cuda(), reason="CUDA Device not Available") +def test_2d_convolution_gpu_equals_cpu(): + + import cupy + + data = convolve_2d_data + numpy_agg = xr.DataArray(data) + cupy_agg = xr.DataArray(cupy.asarray(data)) + + kernel1 = np.ones((1, 1)) + output_numpy1 = convolve_2d(numpy_agg.data, kernel1) + output_cupy1 = convolve_2d(cupy_agg.data, kernel1) + assert isinstance(output_cupy1, cupy.ndarray) + assert np.isclose(output_numpy1, output_cupy1.get(), equal_nan=True).all() + + kernel2 = np.array([[0, 1, 0], + [1, 1, 1], + [0, 1, 0]]) + output_numpy2 = convolve_2d(numpy_agg.data, kernel2) + output_cupy2 = convolve_2d(cupy_agg.data, kernel2) + assert isinstance(output_cupy2, cupy.ndarray) + assert np.isclose(output_numpy2, output_cupy2.get(), equal_nan=True).all() + + kernel3 = np.array([[0, 1, 0], + [1, 0, 1], + [0, 1, 0]]) + output_numpy3 = convolve_2d(numpy_agg.data, kernel3) + output_cupy3 = convolve_2d(cupy_agg.data, kernel3) + assert isinstance(output_cupy3, cupy.ndarray) + assert np.isclose(output_numpy3, output_cupy3.get(), equal_nan=True).all() + + # def test_hotspot(): # n, m = 10, 10 # raster = xr.DataArray(np.zeros((n, m), dtype=float), dims=['y', 'x']) From 71440d91f4ab709474f4035ffec4a1570642f0e1 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 24 Feb 2021 20:08:57 +0700 Subject: [PATCH 06/30] nan edge effect --- xrspatial/convolution.py | 34 ++++++++++++++++++---------------- xrspatial/tests/test_focal.py | 28 ++++++++++++++-------------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/xrspatial/convolution.py b/xrspatial/convolution.py index cd8a7af4..63ee137b 100755 --- a/xrspatial/convolution.py +++ b/xrspatial/convolution.py @@ -195,8 +195,8 @@ def _convolve_2d_cpu(data, kernel): wkx = nkx // 2 wky = nky // 2 - result = np.zeros(data.shape, dtype=float32) - + out = np.zeros(data.shape, dtype=float32) + out[:, :] = np.nan for i in prange(wkx, nx-wkx): iimin = max(i - wkx, 0) iimax = min(i + wkx + 1, nx) @@ -209,14 +209,14 @@ def _convolve_2d_cpu(data, kernel): for jj in range(jjmin, jjmax, 1): jjj = wky + jj - j num += kernel[iii, jjj] * data[ii, jj] - result[i, j] = num + out[i, j] = num - return result + return out # https://www.vincent-lunot.com/post/an-introduction-to-cuda-in-python-part-3/ @cuda.jit -def _convolve_2d_cuda(data, kernel, result): +def _convolve_2d_cuda(data, kernel, out): # expect a 2D grid and 2D blocks, # a kernel with odd numbers of rows and columns, (-1-) # a grayscale image @@ -224,18 +224,19 @@ def _convolve_2d_cuda(data, kernel, result): # (-2-) 2D coordinates of the current thread: i, j = cuda.grid(2) - # (-3-) if the thread coordinates are outside of the data image, we ignore the thread: - data_rows, data_cols = data.shape - if (i >= data_rows) or (j >= data_cols): - return - - # To compute the result at coordinates (i, j), we need to use delta_rows rows of the array + # To compute the out at coordinates (i, j), we need to use delta_rows rows of the array # before and after the i_th row, # as well as delta_cols columns of the array before and after the j_th column: delta_rows = kernel.shape[0] // 2 delta_cols = kernel.shape[1] // 2 - # The result at coordinates (i, j) is equal to + data_rows, data_cols = data.shape + # (-3-) if the thread coordinates are outside of the data image, we ignore the thread + # currently, if the thread coordinates are in the edges, we ignore the thread + if i < delta_rows or i >= data_rows - delta_rows or j < delta_cols or j >= data_cols - delta_cols: + return + + # The out at coordinates (i, j) is equal to # sum_{k, l} kernel[k, l] * data[i - k + delta_rows, j - l + delta_cols] # with k and l going through the whole kernel array: s = 0 @@ -246,14 +247,15 @@ def _convolve_2d_cuda(data, kernel, result): # (-4-) Check if (i_k, j_k) coordinates are inside the array: if (i_k >= 0) and (i_k < data_rows) and (j_l >= 0) and (j_l < data_cols): s += kernel[k, l] * data[i_k, j_l] - result[i, j] = s + out[i, j] = s def _convolve_2d_gpu(data, kernel): - result = cupy.empty_like(data) + out = cupy.empty(data.shape, dtype='f4') + out[:, :] = cupy.nan griddim, blockdim = cuda_args(data.shape) - _convolve_2d_cuda[griddim, blockdim](data, kernel, result) - return result + _convolve_2d_cuda[griddim, blockdim](data, kernel, cupy.asarray(out)) + return out def convolve_2d(data, kernel): diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index a6e2f317..a58c2b3b 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -90,14 +90,14 @@ def test_convolution(): [1, 1, 1], [0, 1, 0]]) output_2 = convolve_2d(data, kernel2) - expected_output_2 = np.array([[0., 0., 0., 0., 0., 0.], - [0., 4., 3., 5., 5., 0.], - [0., 3., np.nan, np.nan, np.nan, 0.], - [0., 5., np.nan, np.nan, np.nan, 0.], - [0., 5., np.nan, np.nan, np.nan, 0.], - [0., 0., 0., 0., 0., 0.]]) + expected_output_2 = np.array([[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, 4., 3., 5., 5., np.nan], + [np.nan, 3., np.nan, np.nan, np.nan, np.nan], + [np.nan, 5., np.nan, np.nan, np.nan, np.nan], + [np.nan, 5., np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]]) # kernel2 is of 3x3, thus the border edge is 1 cell long. - # currently, ignoring border edge (i.e values in edges are all 0s) + # currently, ignoring border edge (i.e values in edges are all nans) assert isinstance(output_2, np.ndarray) assert np.isclose(output_2, expected_output_2, equal_nan=True).all() @@ -105,14 +105,14 @@ def test_convolution(): [1, 0, 1], [0, 1, 0]]) output_3 = convolve_2d(data, kernel3) - expected_output_3 = np.array([[0., 0., 0., 0., 0., 0.], - [0., 4., 2., 4., 4., 0.], - [0., 2., np.nan, np.nan, np.nan, 0.], - [0., 4., np.nan, np.nan, np.nan, 0.], - [0., 4., np.nan, np.nan, np.nan, 0.], - [0., 0., 0., 0., 0., 0.]]) + expected_output_3 = np.array([[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, 4., 2., 4., 4., np.nan], + [np.nan, 2., np.nan, np.nan, np.nan, np.nan], + [np.nan, 4., np.nan, np.nan, np.nan, np.nan], + [np.nan, 4., np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]]) # kernel3 is of 3x3, thus the border edge is 1 cell long. - # currently, ignoring border edge (i.e values in edges are all 0s) + # currently, ignoring border edge (i.e values in edges are all nans) assert isinstance(output_3, np.ndarray) assert np.isclose(output_3, expected_output_3, equal_nan=True).all() From d4e27f2b4498d1de24994a7c748fe0860e991480 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 24 Feb 2021 20:48:48 +0700 Subject: [PATCH 07/30] refactor --- examples/user_guide/4_Focal.ipynb | 2 +- xrspatial/convolution.py | 47 +++++++++++++------------------ xrspatial/focal.py | 5 ++-- xrspatial/tests/test_focal.py | 4 +-- 4 files changed, 24 insertions(+), 34 deletions(-) mode change 100644 => 100755 examples/user_guide/4_Focal.ipynb diff --git a/examples/user_guide/4_Focal.ipynb b/examples/user_guide/4_Focal.ipynb old mode 100644 new mode 100755 index 992d0193..abc51e9d --- a/examples/user_guide/4_Focal.ipynb +++ b/examples/user_guide/4_Focal.ipynb @@ -195,7 +195,7 @@ "source": [ "from xrspatial import focal\n", "\n", - "cellsize_x, cellsize_y = focal.calc_cellsize(terrain)\n", + "cellsize_x, cellsize_y = focal.cellsize(terrain)\n", "# Use an annulus kernel with a ring at a distance from 25-30 cells away from focal point\n", "outer_radius = cellsize_x * 30\n", "inner_radius = cellsize_x * 25\n", diff --git a/xrspatial/convolution.py b/xrspatial/convolution.py index 63ee137b..0cda8257 100755 --- a/xrspatial/convolution.py +++ b/xrspatial/convolution.py @@ -79,7 +79,7 @@ def _get_distance(distance_str): return meters -def calc_cellsize(raster): +def cellsize(raster): if 'unit' in raster.attrs: unit = raster.attrs['unit'] else: @@ -95,7 +95,7 @@ def calc_cellsize(raster): return cellsize_x, np.abs(cellsize_y) -def _gen_ellipse_kernel(half_w, half_h): +def _ellipse_kernel(half_w, half_h): # x values of interest x = np.linspace(-half_w, half_w, 2 * half_w + 1) # y values of interest, as a "column" array @@ -107,27 +107,6 @@ def _gen_ellipse_kernel(half_w, half_h): return ellipse.astype(float) -def _validate_kernel(kernel): - """Validatetes that the kernel is a numpy array and has odd dimensions.""" - - if not isinstance(kernel, np.ndarray): - raise ValueError( - "Received a custom kernel that is not a Numpy array.", - """The kernel received was of type {} and needs to be of type `ndarray` - """.format(type(kernel)) - ) - else: - rows, cols = kernel.shape - - if (rows % 2 == 0 or cols % 2 == 0): - raise ValueError( - "Received custom kernel with improper dimensions.", - """A custom kernel needs to have an odd shape, the - supplied kernel has {} rows and {} columns. - """.format(rows, cols) - ) - - def circle_kernel(cellsize_x, cellsize_y, radius): # validate radius, convert radius to meters r = _get_distance(str(radius)) @@ -135,7 +114,7 @@ def circle_kernel(cellsize_x, cellsize_y, radius): kernel_half_w = int(r / cellsize_x) kernel_half_h = int(r / cellsize_y) - kernel = _gen_ellipse_kernel(kernel_half_w, kernel_half_h) + kernel = _ellipse_kernel(kernel_half_w, kernel_half_h) return kernel @@ -154,8 +133,7 @@ def annulus_kernel(cellsize_x, cellsize_y, outer_radius, inner_radius): r_inner = r2 if r_outer - r_inner < np.sqrt((cellsize_x / 2)**2 + (cellsize_y / 2)**2): - warnings.warn('Annulus radii are closer than cellsize distance.', - Warning) + warnings.warn('Annulus radii are closer than cellsize distance.', Warning) # Get the two circular kernels for the annulus kernel_outer = circle_kernel(cellsize_x, cellsize_y, outer_radius) @@ -178,7 +156,20 @@ def annulus_kernel(cellsize_x, cellsize_y, outer_radius, inner_radius): def custom_kernel(kernel): """Validates a custom kernel. If the kernel is valid, returns itself.""" - _validate_kernel(kernel) + + if not isinstance(kernel, np.ndarray): + raise ValueError( + "Received a custom kernel that is not a Numpy array.", + "The kernel received was of type {} and needs to be of type `ndarray`".format(type(kernel)) + ) + else: + rows, cols = kernel.shape + + if (rows % 2 == 0 or cols % 2 == 0): + raise ValueError( + "Received custom kernel with improper dimensions.", + "A custom kernel needs to have an odd shape, the supplied kernel has {} rows and {} columns.".format(rows, cols) + ) return kernel @@ -244,7 +235,7 @@ def _convolve_2d_cuda(data, kernel, out): for l in range(kernel.shape[1]): i_k = i - k + delta_rows j_l = j - l + delta_cols - # (-4-) Check if (i_k, j_k) coordinates are inside the array: + # (-4-) Check if (i_k, j_l) coordinates are inside the array: if (i_k >= 0) and (i_k < data_rows) and (j_l >= 0) and (j_l < data_cols): s += kernel[k, l] * data[i_k, j_l] out[i, j] = s diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 2743c2dd..6292aafd 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -1,5 +1,4 @@ '''Focal Related Utilities''' -import re import warnings from numba import prange @@ -7,7 +6,7 @@ from xarray import DataArray from xrspatial.utils import ngjit -from xrspatial.convolution import convolve_2d, _validate_kernel +from xrspatial.convolution import convolve_2d, custom_kernel warnings.simplefilter('default') @@ -156,7 +155,7 @@ def apply(raster, kernel, x='x', y='y', func=calc_mean): "(%s, %s)".format(y, x)) # Validate the kernel - _validate_kernel(kernel) + kernel = custom_kernel(kernel) # apply kernel to raster values out = _apply(raster.values.astype(float), kernel, func) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index a58c2b3b..0b045ef2 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -9,7 +9,7 @@ from xrspatial import mean from xrspatial.focal import hotspots from xrspatial.convolution import ( - convolve_2d, calc_cellsize, _validate_kernel, + convolve_2d, cellsize, circle_kernel, annulus_kernel, ) import pytest @@ -154,7 +154,7 @@ def test_2d_convolution_gpu_equals_cpu(): # raster = xr.DataArray(np.zeros((n, m), dtype=float), dims=['y', 'x']) # raster['x'] = np.linspace(0, n, n) # raster['y'] = np.linspace(0, m, m) -# cellsize_x, cellsize_y = calc_cellsize(raster) +# cellsize_x, cellsize_y = cellsize(raster) # # kernel = circle_kernel(cellsize_x, cellsize_y, 2.0) # From 11c3045b0c66437b5b69fdd75e5bb7d0fb1e3382 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 24 Feb 2021 21:01:05 +0700 Subject: [PATCH 08/30] kernel: add tests --- xrspatial/convolution.py | 2 +- xrspatial/tests/test_focal.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/xrspatial/convolution.py b/xrspatial/convolution.py index 0cda8257..1c50f707 100755 --- a/xrspatial/convolution.py +++ b/xrspatial/convolution.py @@ -4,7 +4,7 @@ import warnings warnings.simplefilter('default') -from numba import cuda, float32, int32, prange, jit +from numba import cuda, float32, prange, jit from xrspatial.utils import has_cuda from xrspatial.utils import cuda_args diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 0b045ef2..2d1db227 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -72,6 +72,30 @@ def test_mean_transfer_function(): [1., 1., 1., 1., 1., 0.]]) +def test_kernel(): + data = convolve_2d_data + m, n = data.shape + agg = xr.DataArray(data, dims=['y', 'x']) + agg['x'] = np.linspace(0, n, n) + agg['y'] = np.linspace(0, m, m) + + cellsize_x, cellsize_y = cellsize(agg) + + kernel1 = circle_kernel(cellsize_x, cellsize_y, 2) + expected_kernel1 = np.array([[0, 1, 0], + [1, 1, 1], + [0, 1, 0]]) + assert isinstance(kernel1, np.ndarray) + assert np.isclose(kernel1, expected_kernel1, equal_nan=True).all() + + kernel2 = annulus_kernel(cellsize_x, cellsize_y, 2, 0.5) + expected_kernel2 = np.array([[0, 1, 0], + [1, 0, 1], + [0, 1, 0]]) + assert isinstance(kernel2, np.ndarray) + assert np.isclose(kernel2, expected_kernel2, equal_nan=True).all() + + def test_convolution(): data = convolve_2d_data From b5b2a37a3b6be54c38f2d86e3b43d37b4f61d1c3 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 24 Feb 2021 21:20:22 +0700 Subject: [PATCH 09/30] convolve_2d: dask numpy case --- xrspatial/convolution.py | 22 ++++++++++++++++++++-- xrspatial/tests/test_focal.py | 31 ++++++++++++++++++++++--------- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/xrspatial/convolution.py b/xrspatial/convolution.py index 1c50f707..463c8308 100755 --- a/xrspatial/convolution.py +++ b/xrspatial/convolution.py @@ -1,5 +1,8 @@ +from functools import partial + import re import numpy as np +import dask.array as da import warnings warnings.simplefilter('default') @@ -174,7 +177,7 @@ def custom_kernel(kernel): @jit(nopython=True, nogil=True, parallel=True) -def _convolve_2d_cpu(data, kernel): +def _convolve_2d_numpy(data, kernel): """Apply kernel to data image.""" # TODO: handle nan @@ -205,6 +208,17 @@ def _convolve_2d_cpu(data, kernel): return out +def _convolve_2d_dask_numpy(data, kernel): + pad_h = kernel.shape[0] // 2 + pad_w = kernel.shape[1] // 2 + _func = partial(_convolve_2d_numpy, kernel=kernel) + out = data.map_overlap(_func, + depth=(pad_h, pad_w), + boundary=np.nan, + meta=np.array(())) + return out + + # https://www.vincent-lunot.com/post/an-introduction-to-cuda-in-python-part-3/ @cuda.jit def _convolve_2d_cuda(data, kernel, out): @@ -257,12 +271,16 @@ def convolve_2d(data, kernel): # numpy case if isinstance(data, np.ndarray): - out = _convolve_2d_cpu(data, kernel) + out = _convolve_2d_numpy(data, kernel) # cupy case elif has_cuda() and isinstance(data, cupy.ndarray): out = _convolve_2d_gpu(data, kernel) + # dask + numpy case + elif isinstance(data, da.Array): + out = _convolve_2d_dask_numpy(data, kernel) + else: raise TypeError('Unsupported Array Type: {}'.format(type(data))) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 2d1db227..822a3d7b 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -98,22 +98,27 @@ def test_kernel(): def test_convolution(): data = convolve_2d_data + dask_data = da.from_array(data, chunks=(3, 3)) kernel1 = np.ones((1, 1)) - output_1 = convolve_2d(data, kernel1) + numpy_output_1 = convolve_2d(data, kernel1) expected_output_1 = np.array([[0., 1., 1., 1., 1., 1.], [1., 0., 1., 1., 1., 1.], [1., 1., 0., 1., 1., 1.], [1., 1., 1., np.nan, 1., 1.], [1., 1., 1., 1., 0., 1.], [1., 1., 1., 1., 1., 0.]]) - assert isinstance(output_1, np.ndarray) - assert np.isclose(output_1, expected_output_1, equal_nan=True).all() + assert isinstance(numpy_output_1, np.ndarray) + assert np.isclose(numpy_output_1, expected_output_1, equal_nan=True).all() + + dask_output_1 = convolve_2d(dask_data, kernel1) + assert isinstance(dask_output_1, da.Array) + assert np.isclose(dask_output_1.compute(), expected_output_1, equal_nan=True).all() kernel2 = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]) - output_2 = convolve_2d(data, kernel2) + numpy_output_2 = convolve_2d(data, kernel2) expected_output_2 = np.array([[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, 4., 3., 5., 5., np.nan], [np.nan, 3., np.nan, np.nan, np.nan, np.nan], @@ -122,13 +127,17 @@ def test_convolution(): [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]]) # kernel2 is of 3x3, thus the border edge is 1 cell long. # currently, ignoring border edge (i.e values in edges are all nans) - assert isinstance(output_2, np.ndarray) - assert np.isclose(output_2, expected_output_2, equal_nan=True).all() + assert isinstance(numpy_output_2, np.ndarray) + assert np.isclose(numpy_output_2, expected_output_2, equal_nan=True).all() + + dask_output_2 = convolve_2d(dask_data, kernel2) + assert isinstance(dask_output_2, da.Array) + assert np.isclose(dask_output_2.compute(), expected_output_2, equal_nan=True).all() kernel3 = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]]) - output_3 = convolve_2d(data, kernel3) + numpy_output_3 = convolve_2d(data, kernel3) expected_output_3 = np.array([[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, 4., 2., 4., 4., np.nan], [np.nan, 2., np.nan, np.nan, np.nan, np.nan], @@ -137,8 +146,12 @@ def test_convolution(): [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]]) # kernel3 is of 3x3, thus the border edge is 1 cell long. # currently, ignoring border edge (i.e values in edges are all nans) - assert isinstance(output_3, np.ndarray) - assert np.isclose(output_3, expected_output_3, equal_nan=True).all() + assert isinstance(numpy_output_3, np.ndarray) + assert np.isclose(numpy_output_3, expected_output_3, equal_nan=True).all() + + dask_output_3 = convolve_2d(dask_data, kernel3) + assert isinstance(dask_output_3, da.Array) + assert np.isclose(dask_output_3.compute(), expected_output_3, equal_nan=True).all() @pytest.mark.skipif(doesnt_have_cuda(), reason="CUDA Device not Available") From ff73006fe13a7fcc1a6ccc1d41afb29fdc8be097 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 24 Feb 2021 21:27:12 +0700 Subject: [PATCH 10/30] convolve_2d: dask cupy case, raise not implemented --- xrspatial/convolution.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/xrspatial/convolution.py b/xrspatial/convolution.py index 463c8308..8616287a 100755 --- a/xrspatial/convolution.py +++ b/xrspatial/convolution.py @@ -255,7 +255,7 @@ def _convolve_2d_cuda(data, kernel, out): out[i, j] = s -def _convolve_2d_gpu(data, kernel): +def _convolve_2d_cupy(data, kernel): out = cupy.empty(data.shape, dtype='f4') out[:, :] = cupy.nan griddim, blockdim = cuda_args(data.shape) @@ -263,6 +263,11 @@ def _convolve_2d_gpu(data, kernel): return out +def _convolve_2d_dask_cupy(data, kernel): + msg = 'Upstream bug in dask prevents cupy backed arrays' + raise NotImplementedError(msg) + + def convolve_2d(data, kernel): """Function to call the 2D convolution via Numba. The Numba convolution function does not account for an edge so @@ -275,7 +280,12 @@ def convolve_2d(data, kernel): # cupy case elif has_cuda() and isinstance(data, cupy.ndarray): - out = _convolve_2d_gpu(data, kernel) + out = _convolve_2d_cupy(data, kernel) + + # dask + cupy case + elif has_cuda() and isinstance(data, da.Array) and \ + type(data._meta).__module__.split('.')[0] == 'cupy': + out = _convolve_2d_dask_cupy(data, kernel) # dask + numpy case elif isinstance(data, da.Array): From 554a2d4cdd6b0e4dc74d608d6e8013abd969fbca Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 24 Feb 2021 21:39:28 +0700 Subject: [PATCH 11/30] convolution: test dask cupy not implemented --- xrspatial/tests/test_focal.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 822a3d7b..c8036abf 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -185,6 +185,11 @@ def test_2d_convolution_gpu_equals_cpu(): assert isinstance(output_cupy3, cupy.ndarray) assert np.isclose(output_numpy3, output_cupy3.get(), equal_nan=True).all() + # dask + cupy case not implemented + dask_cupy_agg = xr.DataArray(da.from_array(cupy.asarray(data), chunks=(3, 3))) + with pytest.raises(NotImplementedError) as e_info: + convolve_2d(dask_cupy_agg.data, kernel3) + assert e_info # def test_hotspot(): # n, m = 10, 10 From 4eca7b6cbca4e7c86716b2b8dcabaa0e3f43d010 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Thu, 25 Feb 2021 10:42:45 +0700 Subject: [PATCH 12/30] focal mean: refactor --- xrspatial/focal.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 6292aafd..ee799be6 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -14,7 +14,7 @@ @ngjit -def _mean(data, excludes): +def _mean_numpy(data, excludes): out = np.zeros_like(data) rows, cols = data.shape for y in range(1, rows - 1): @@ -41,6 +41,17 @@ def _mean(data, excludes): return out +def _mean(data, excludes): + # numpy case + if isinstance(data, np.ndarray): + out = _mean_numpy(data, excludes) + + else: + raise TypeError('Unsupported Array Type: {}'.format(type(data))) + + return out + + def mean(agg, passes=1, excludes=[np.nan], name='mean'): """ Returns Mean filtered array using a 3x3 window @@ -57,15 +68,15 @@ def mean(agg, passes=1, excludes=[np.nan], name='mean'): ------- data: DataArray """ - out = None + out = agg.data for i in range(passes): - if out is None: - out = _mean(agg.data, tuple(excludes)) - else: - out = _mean(out, tuple(excludes)) + out = _mean(out, tuple(excludes)) - return DataArray(out, name=name, dims=agg.dims, - coords=agg.coords, attrs=agg.attrs) + return DataArray(out, + name=name, + dims=agg.dims, + coords=agg.coords, + attrs=agg.attrs) @ngjit @@ -219,8 +230,7 @@ def hotspots(raster, kernel, x='x', y='y'): raster_dims = raster.dims if raster_dims != (y, x): - raise ValueError("raster.coords should be named as coordinates:" - "(%s, %s)".format(y, x)) + raise ValueError("raster.coords should be named as coordinates: (%s, %s)".format(y, x)) # apply kernel to raster values mean_array = convolve_2d(raster.values, kernel / kernel.sum()) @@ -229,8 +239,7 @@ def hotspots(raster, kernel, x='x', y='y'): global_mean = np.nanmean(raster.values) global_std = np.nanstd(raster.values) if global_std == 0: - raise ZeroDivisionError("Standard deviation " - "of the input raster values is 0.") + raise ZeroDivisionError("Standard deviation of the input raster values is 0.") z_array = (mean_array - global_mean) / global_std out = _hotspots(z_array) From 4b9d38187481a3e6fe11a9bb89dbce5b2630405d Mon Sep 17 00:00:00 2001 From: thuydotm Date: Thu, 25 Feb 2021 11:27:19 +0700 Subject: [PATCH 13/30] focal mean: dask numpy case, nan edge effect --- xrspatial/focal.py | 21 +++++++++++++++++++-- xrspatial/tests/test_focal.py | 32 ++++++++++++++------------------ 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index ee799be6..798e656d 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -1,9 +1,11 @@ '''Focal Related Utilities''' +from functools import partial import warnings from numba import prange import numpy as np from xarray import DataArray +import dask.array as da from xrspatial.utils import ngjit from xrspatial.convolution import convolve_2d, custom_kernel @@ -16,6 +18,7 @@ @ngjit def _mean_numpy(data, excludes): out = np.zeros_like(data) + out[:, :] = np.nan rows, cols = data.shape for y in range(1, rows - 1): for x in range(1, cols - 1): @@ -41,10 +44,23 @@ def _mean_numpy(data, excludes): return out +def _mean_dask_numpy(data, excludes): + _func = partial(_mean_numpy, excludes=excludes) + out = data.map_overlap(_func, + depth=(1, 1), + boundary=np.nan, + meta=np.array(())) + return out + + def _mean(data, excludes): # numpy case if isinstance(data, np.ndarray): - out = _mean_numpy(data, excludes) + out = _mean_numpy(data.astype(np.float), excludes) + + # dask + numpy case + elif isinstance(data, da.Array): + out = _mean_dask_numpy(data.astype(np.float), excludes) else: raise TypeError('Unsupported Array Type: {}'.format(type(data))) @@ -54,7 +70,8 @@ def _mean(data, excludes): def mean(agg, passes=1, excludes=[np.nan], name='mean'): """ - Returns Mean filtered array using a 3x3 window + Returns Mean filtered array using a 3x3 window. + Default behaviour to 'mean' is to pad the borders with nans Parameters ---------- diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index c8036abf..c3c0b159 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -39,29 +39,25 @@ def _do_gaussian_array(): return gaussian -data_random = np.random.random_sample((100, 100)) +data_random = np.random.random_sample((5, 5)) data_random_sparse = _do_sparse_array(data_random) data_gaussian = _do_gaussian_array() def test_mean_transfer_function(): - da = xr.DataArray(data_random) - da_mean = mean(da) - assert da.shape == da_mean.shape - - # Overall mean value should be the same as the original array. - # Considering the default behaviour to 'mean' is to pad the borders - # with zeros, the mean value of the filtered array will be slightly - # smaller (considering 'data_random' is positive). - assert da_mean.mean() <= data_random.mean() - - # And if we pad the borders with the original values, we should have a - # 'mean' filtered array with _mean_ value very similar to the original one. - da_mean[0, :] = data_random[0, :] - da_mean[-1, :] = data_random[-1, :] - da_mean[:, 0] = data_random[:, 0] - da_mean[:, -1] = data_random[:, -1] - assert abs(da_mean.mean() - data_random.mean()) < 10**-3 + # numpy case + numpy_agg = xr.DataArray(data_random) + numpy_mean = mean(numpy_agg) + assert isinstance(numpy_mean.data, np.ndarray) + + # dask + numpy case + dask_numpy_agg = xr.DataArray(da.from_array(data_random, chunks=(3, 3))) + dask_numpy_mean = mean(dask_numpy_agg) + assert isinstance(dask_numpy_mean.data, da.Array) + + # both output same results + assert np.isclose(numpy_mean, dask_numpy_mean.compute(), equal_nan=True).all() + assert numpy_agg.shape == numpy_mean.shape convolve_2d_data = np.array([[0., 1., 1., 1., 1., 1.], From 09e0dc35cf0a5bfca7c9729b6a4674c1d93a3b61 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Thu, 25 Feb 2021 14:00:36 +0700 Subject: [PATCH 14/30] focal mean: check if exclude nan --- xrspatial/focal.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 798e656d..cb82262a 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -7,6 +7,14 @@ from xarray import DataArray import dask.array as da +from numba import cuda + +try: + import cupy +except ImportError: + class cupy(object): + ndarray = False + from xrspatial.utils import ngjit from xrspatial.convolution import convolve_2d, custom_kernel @@ -15,17 +23,26 @@ # TODO: Make convolution more generic with numba first-class functions. +@ngjit +def _equal(x, y): + if x == y or (np.isnan(x) and np.isnan(y)): + return True + return False + + @ngjit def _mean_numpy(data, excludes): + # TODO: exclude nans out = np.zeros_like(data) out[:, :] = np.nan rows, cols = data.shape + for y in range(1, rows - 1): for x in range(1, cols - 1): exclude = False for ex in excludes: - if data[y, x] == ex: + if _equal(data[y, x], ex): exclude = True break From 0bc27dc4e3137f63780bb87e81643397980417a8 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Thu, 25 Feb 2021 15:11:10 +0700 Subject: [PATCH 15/30] focal mean: cupy case --- xrspatial/focal.py | 49 ++++++++++++++++++++++++++++++++--- xrspatial/tests/test_focal.py | 7 ++++- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index cb82262a..09f5c5b6 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -2,6 +2,8 @@ from functools import partial import warnings +from math import isnan + from numba import prange import numpy as np from xarray import DataArray @@ -15,6 +17,8 @@ class cupy(object): ndarray = False +from xrspatial.utils import cuda_args +from xrspatial.utils import has_cuda from xrspatial.utils import ngjit from xrspatial.convolution import convolve_2d, custom_kernel @@ -24,7 +28,7 @@ class cupy(object): @ngjit -def _equal(x, y): +def _equal_numpy(x, y): if x == y or (np.isnan(x) and np.isnan(y)): return True return False @@ -32,7 +36,7 @@ def _equal(x, y): @ngjit def _mean_numpy(data, excludes): - # TODO: exclude nans + # TODO: exclude nans, what if nans in the kernel? out = np.zeros_like(data) out[:, :] = np.nan rows, cols = data.shape @@ -42,7 +46,7 @@ def _mean_numpy(data, excludes): exclude = False for ex in excludes: - if _equal(data[y, x], ex): + if _equal_numpy(data[y, x], ex): exclude = True break @@ -61,6 +65,41 @@ def _mean_numpy(data, excludes): return out +@cuda.jit(device=True) +def _kernel_mean_gpu(data): + return (data[-1, -1] + data[-1, 0] + data[-1, 1] + + data[0, -1] + data[0, 0] + data[0, 1] + + data[1, -1] + data[1, 0] + data[1, 1]) / 9 + + +@cuda.jit +def _mean_gpu(data, excludes, out): + i, j = cuda.grid(2) + di = 1 + dj = 1 + + for ex in excludes: + if (data[i, j] == ex) or (isnan(data[i, j]) and isnan(ex)): + out[i, j] = data[i, j] + return + + if (i - di >= 0 and i + di <= out.shape[0] - 1 and + j - dj >= 0 and j + dj <= out.shape[1] - 1): + out[i, j] = _kernel_mean_gpu(data[i-1:i+2, j-1:j+2]) + + +def _mean_cupy(data, excludes): + out = cupy.zeros_like(data) + out[:, :] = cupy.nan + griddim, blockdim = cuda_args(data.shape) + out = cupy.empty(data.shape, dtype='f4') + out[:] = cupy.nan + + _mean_gpu[griddim, blockdim](data, cupy.asarray(excludes), out) + + return out + + def _mean_dask_numpy(data, excludes): _func = partial(_mean_numpy, excludes=excludes) out = data.map_overlap(_func, @@ -75,6 +114,10 @@ def _mean(data, excludes): if isinstance(data, np.ndarray): out = _mean_numpy(data.astype(np.float), excludes) + # cupy case + elif has_cuda() and isinstance(data, cupy.ndarray): + out = _mean_cupy(data, excludes) + # dask + numpy case elif isinstance(data, da.Array): out = _mean_dask_numpy(data.astype(np.float), excludes) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index c3c0b159..3375732d 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -39,7 +39,7 @@ def _do_gaussian_array(): return gaussian -data_random = np.random.random_sample((5, 5)) +data_random = np.random.random_sample((100, 100)) data_random_sparse = _do_sparse_array(data_random) data_gaussian = _do_gaussian_array() @@ -59,6 +59,11 @@ def test_mean_transfer_function(): assert np.isclose(numpy_mean, dask_numpy_mean.compute(), equal_nan=True).all() assert numpy_agg.shape == numpy_mean.shape + import cupy + cupy_agg = xr.DataArray(cupy.asarray(data_random)) + cupy_mean = mean(cupy_agg) + assert np.isclose(numpy_mean, cupy_mean.data.get(), equal_nan=True).all() + convolve_2d_data = np.array([[0., 1., 1., 1., 1., 1.], [1., 0., 1., 1., 1., 1.], From 179f09feee8852ed40e7cf3303b17f7443231625 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Thu, 25 Feb 2021 15:54:39 +0700 Subject: [PATCH 16/30] focal mean: dask cupy case not implemented --- xrspatial/focal.py | 26 ++++++++++++++++++-------- xrspatial/tests/test_focal.py | 20 +++++++++++++++++++- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 09f5c5b6..7a7bce80 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -65,6 +65,15 @@ def _mean_numpy(data, excludes): return out +def _mean_dask_numpy(data, excludes): + _func = partial(_mean_numpy, excludes=excludes) + out = data.map_overlap(_func, + depth=(1, 1), + boundary=np.nan, + meta=np.array(())) + return out + + @cuda.jit(device=True) def _kernel_mean_gpu(data): return (data[-1, -1] + data[-1, 0] + data[-1, 1] @@ -100,13 +109,9 @@ def _mean_cupy(data, excludes): return out -def _mean_dask_numpy(data, excludes): - _func = partial(_mean_numpy, excludes=excludes) - out = data.map_overlap(_func, - depth=(1, 1), - boundary=np.nan, - meta=np.array(())) - return out +def _mean_dask_cupy(data, excludes): + msg = 'Upstream bug in dask prevents cupy backed arrays' + raise NotImplementedError(msg) def _mean(data, excludes): @@ -116,7 +121,12 @@ def _mean(data, excludes): # cupy case elif has_cuda() and isinstance(data, cupy.ndarray): - out = _mean_cupy(data, excludes) + out = _mean_cupy(data.astype(cupy.float), excludes) + + # dask + cupy case + elif has_cuda() and isinstance(data, da.Array) and \ + type(data._meta).__module__.split('.')[0] == 'cupy': + out = _mean_dask_cupy(data.astype(cupy.float), excludes) # dask + numpy case elif isinstance(data, da.Array): diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 3375732d..5d8e09c1 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -44,7 +44,7 @@ def _do_gaussian_array(): data_gaussian = _do_gaussian_array() -def test_mean_transfer_function(): +def test_mean_transfer_function_cpu(): # numpy case numpy_agg = xr.DataArray(data_random) numpy_mean = mean(numpy_agg) @@ -59,11 +59,29 @@ def test_mean_transfer_function(): assert np.isclose(numpy_mean, dask_numpy_mean.compute(), equal_nan=True).all() assert numpy_agg.shape == numpy_mean.shape + +@pytest.mark.skipif(doesnt_have_cuda(), reason="CUDA Device not Available") +def test_mean_transfer_function_gpu_equals_cpu(): + import cupy + + # cupy case cupy_agg = xr.DataArray(cupy.asarray(data_random)) cupy_mean = mean(cupy_agg) + assert isinstance(cupy_mean.data, cupy.ndarray) + + # numpy case + numpy_agg = xr.DataArray(data_random) + numpy_mean = mean(numpy_agg) + assert np.isclose(numpy_mean, cupy_mean.data.get(), equal_nan=True).all() + # dask + cupy case not implemented + dask_cupy_agg = xr.DataArray(da.from_array(cupy.asarray(data_random), chunks=(3, 3))) + with pytest.raises(NotImplementedError) as e_info: + mean(dask_cupy_agg) + assert e_info + convolve_2d_data = np.array([[0., 1., 1., 1., 1., 1.], [1., 0., 1., 1., 1., 1.], From 36329ea2dfc19efa8eeb7f9ecf2a756c71a6d9ca Mon Sep 17 00:00:00 2001 From: thuydotm Date: Thu, 25 Feb 2021 16:57:36 +0700 Subject: [PATCH 17/30] focal apply: numpy case with test --- xrspatial/focal.py | 31 ++++++++++++++++++------------- xrspatial/tests/test_focal.py | 33 +++++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 7a7bce80..7e897a57 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -209,7 +209,7 @@ def _confidence(zscore): @ngjit -def _apply(data, kernel_array, func): +def _apply_numpy(data, kernel_array, func): out = np.zeros_like(data) rows, cols = data.shape krows, kcols = kernel_array.shape @@ -222,16 +222,26 @@ def _apply(data, kernel_array, func): kernel_values.fill(np.nan) for ky in range(y - hrows, y + hrows + 1): for kx in range(x - hcols, x + hcols + 1): - if ky >= 0 and kx >= 0: - if ky >= 0 and ky < rows and kx >= 0 and kx < cols: - kyidx, kxidx = ky - (y - hrows), kx - (x - hcols) - if kernel_array[kyidx, kxidx] == 1: - kernel_values[kyidx, kxidx] = data[ky, kx] + if ky >= 0 and ky < rows and kx >= 0 and kx < cols: + kyidx, kxidx = ky - (y - hrows), kx - (x - hcols) + if kernel_array[kyidx, kxidx] == 1: + kernel_values[kyidx, kxidx] = data[ky, kx] out[y, x] = func(kernel_values) return out -def apply(raster, kernel, x='x', y='y', func=calc_mean): +def _apply(data, kernel_array, func): + # numpy case + if isinstance(data, np.ndarray): + out = _apply_numpy(data, kernel_array, func) + + else: + raise TypeError('Unsupported Array Type: {}'.format(type(data))) + + return out + + +def apply(raster, kernel, func=calc_mean): """ """ @@ -247,16 +257,11 @@ def apply(raster, kernel, x='x', y='y', func=calc_mean): raise ValueError( "`raster` must be an array of integers or float") - raster_dims = raster.dims - if raster_dims != (y, x): - raise ValueError("raster.coords should be named as coordinates:" - "(%s, %s)".format(y, x)) - # Validate the kernel kernel = custom_kernel(kernel) # apply kernel to raster values - out = _apply(raster.values.astype(float), kernel, func) + out = _apply(raster.data.astype(float), kernel, func) result = DataArray(out, coords=raster.coords, diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 5d8e09c1..27155cf3 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -7,12 +7,8 @@ from xrspatial.utils import doesnt_have_cuda from xrspatial import mean -from xrspatial.focal import hotspots -from xrspatial.convolution import ( - convolve_2d, cellsize, - circle_kernel, annulus_kernel, -) -import pytest +from xrspatial.focal import hotspots, apply +from xrspatial.convolution import convolve_2d, cellsize, circle_kernel, annulus_kernel def _do_sparse_array(data_array): @@ -210,6 +206,31 @@ def test_2d_convolution_gpu_equals_cpu(): convolve_2d(dask_cupy_agg.data, kernel3) assert e_info + +def test_apply(): + + from xrspatial.utils import ngjit + + @ngjit + def func(x): + return 0 + + data = np.array([[0, 1, 2, 3, 4, 5], + [6, 7, 8, 9, 10, 11], + [12, 13, 14, 15, 16, 17], + [18, 19, 20, 21, 22, 23]]) + + kernel = np.array([[0, 1, 0], + [1, 0, 1], + [0, 1, 0]]) + + # numpy case + numpy_agg = xr.DataArray(data) + numpy_apply = apply(numpy_agg, kernel, func=func) + assert isinstance(numpy_apply.data, np.ndarray) + assert np.count_nonzero(numpy_apply.data) == 0 + + # def test_hotspot(): # n, m = 10, 10 # raster = xr.DataArray(np.zeros((n, m), dtype=float), dims=['y', 'x']) From 2d6d878f7beca883abfaf590bd28bfcac536ac6f Mon Sep 17 00:00:00 2001 From: thuydotm Date: Thu, 25 Feb 2021 17:14:19 +0700 Subject: [PATCH 18/30] focal apply: dask numpy case --- xrspatial/focal.py | 29 +++++++++++++++++++++++------ xrspatial/tests/test_focal.py | 13 +++++++++++-- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 7e897a57..9a7856df 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -209,12 +209,12 @@ def _confidence(zscore): @ngjit -def _apply_numpy(data, kernel_array, func): +def _apply_numpy(data, kernel, func): out = np.zeros_like(data) rows, cols = data.shape - krows, kcols = kernel_array.shape + krows, kcols = kernel.shape hrows, hcols = int(krows / 2), int(kcols / 2) - kernel_values = np.zeros_like(kernel_array, dtype=data.dtype) + kernel_values = np.zeros_like(kernel, dtype=data.dtype) for y in prange(rows): for x in prange(cols): @@ -224,16 +224,33 @@ def _apply_numpy(data, kernel_array, func): for kx in range(x - hcols, x + hcols + 1): if ky >= 0 and ky < rows and kx >= 0 and kx < cols: kyidx, kxidx = ky - (y - hrows), kx - (x - hcols) - if kernel_array[kyidx, kxidx] == 1: + if kernel[kyidx, kxidx] == 1: kernel_values[kyidx, kxidx] = data[ky, kx] out[y, x] = func(kernel_values) return out -def _apply(data, kernel_array, func): +def _apply_dask_numpy(data, kernel, func): + _func = partial(_apply_numpy, kernel=kernel, func=func) + + pad_h = kernel.shape[0] // 2 + pad_w = kernel.shape[1] // 2 + + out = data.map_overlap(_func, + depth=(pad_h, pad_w), + boundary=np.nan, + meta=np.array(())) + return out + + +def _apply(data, kernel, func): # numpy case if isinstance(data, np.ndarray): - out = _apply_numpy(data, kernel_array, func) + out = _apply_numpy(data, kernel, func) + + # dask + numpy case + elif isinstance(data, da.Array): + out = _apply_dask_numpy(data, kernel, func) else: raise TypeError('Unsupported Array Type: {}'.format(type(data))) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 27155cf3..93a2fb34 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -212,7 +212,7 @@ def test_apply(): from xrspatial.utils import ngjit @ngjit - def func(x): + def func_zero(x): return 0 data = np.array([[0, 1, 2, 3, 4, 5], @@ -226,10 +226,19 @@ def func(x): # numpy case numpy_agg = xr.DataArray(data) - numpy_apply = apply(numpy_agg, kernel, func=func) + numpy_apply = apply(numpy_agg, kernel, func_zero) assert isinstance(numpy_apply.data, np.ndarray) + assert numpy_agg.shape == numpy_apply.shape assert np.count_nonzero(numpy_apply.data) == 0 + # dask + numpy case + dask_numpy_agg = xr.DataArray(da.from_array(data, chunks=(3, 3))) + dask_numpy_apply = apply(dask_numpy_agg, kernel, func_zero) + assert isinstance(dask_numpy_apply.data, da.Array) + + # both output same results + assert np.isclose(numpy_apply, dask_numpy_apply.compute(), equal_nan=True).all() + # def test_hotspot(): # n, m = 10, 10 From 5a4b4b550c83896fc92e727ca503168e0a82e402 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Thu, 25 Feb 2021 21:07:28 +0700 Subject: [PATCH 19/30] focal apply: gpu cupy support --- xrspatial/focal.py | 47 +++++++++++++++++++++++++----- xrspatial/tests/test_focal.py | 55 +++++++++++++++++++++++++---------- 2 files changed, 79 insertions(+), 23 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 9a7856df..46a1daa7 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -9,14 +9,13 @@ from xarray import DataArray import dask.array as da -from numba import cuda - try: import cupy except ImportError: class cupy(object): ndarray = False +from numba import cuda from xrspatial.utils import cuda_args from xrspatial.utils import has_cuda from xrspatial.utils import ngjit @@ -243,11 +242,50 @@ def _apply_dask_numpy(data, kernel, func): return out +def _apply_cupy(data, kernel, func): + + out = cupy.zeros(data.shape, dtype=data.dtype) + out[:] = cupy.nan + + rows, cols = data.shape + krows, kcols = kernel.shape + hrows, hcols = int(krows / 2), int(kcols / 2) + kernel_values = cupy.zeros_like(kernel, dtype=data.dtype) + + for y in prange(rows): + for x in prange(cols): + # kernel values are all nans at the beginning of each step + kernel_values.fill(np.nan) + for ky in range(y - hrows, y + hrows + 1): + for kx in range(x - hcols, x + hcols + 1): + if ky >= 0 and ky < rows and kx >= 0 and kx < cols: + kyidx, kxidx = ky - (y - hrows), kx - (x - hcols) + if kernel[kyidx, kxidx] == 1: + kernel_values[kyidx, kxidx] = data[ky, kx] + out[y, x] = func(kernel_values) + + return out + + +def _apply_dask_cupy(data, kernel, func): + msg = 'Upstream bug in dask prevents cupy backed arrays' + raise NotImplementedError(msg) + + def _apply(data, kernel, func): # numpy case if isinstance(data, np.ndarray): out = _apply_numpy(data, kernel, func) + # cupy case + elif has_cuda() and isinstance(data, cupy.ndarray): + out = _apply_cupy(data, cupy.asarray(kernel), func) + + # dask + cupy case + elif has_cuda() and isinstance(data, da.Array) and \ + type(data._meta).__module__.split('.')[0] == 'cupy': + out = _apply_dask_cupy(data, cupy.asarray(kernel), func) + # dask + numpy case elif isinstance(data, da.Array): out = _apply_dask_numpy(data, kernel, func) @@ -269,11 +307,6 @@ def apply(raster, kernel, func=calc_mean): if raster.ndim != 2: raise ValueError("`raster` must be 2D") - if not (issubclass(raster.values.dtype.type, np.integer) or - issubclass(raster.values.dtype.type, np.floating)): - raise ValueError( - "`raster` must be an array of integers or float") - # Validate the kernel kernel = custom_kernel(kernel) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 93a2fb34..00010c21 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -5,6 +5,7 @@ import dask.array as da from xrspatial.utils import doesnt_have_cuda +from xrspatial.utils import ngjit from xrspatial import mean from xrspatial.focal import hotspots, apply @@ -207,39 +208,61 @@ def test_2d_convolution_gpu_equals_cpu(): assert e_info -def test_apply(): +data_apply = np.array([[0, 1, 2, 3, 4, 5], + [6, 7, 8, 9, 10, 11], + [12, 13, 14, 15, 16, 17], + [18, 19, 20, 21, 22, 23]]) - from xrspatial.utils import ngjit +kernel_apply = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]]) + +def test_apply_cpu(): @ngjit - def func_zero(x): + def func_zero_cpu(x): return 0 - data = np.array([[0, 1, 2, 3, 4, 5], - [6, 7, 8, 9, 10, 11], - [12, 13, 14, 15, 16, 17], - [18, 19, 20, 21, 22, 23]]) - - kernel = np.array([[0, 1, 0], - [1, 0, 1], - [0, 1, 0]]) - # numpy case - numpy_agg = xr.DataArray(data) - numpy_apply = apply(numpy_agg, kernel, func_zero) + numpy_agg = xr.DataArray(data_apply) + numpy_apply = apply(numpy_agg, kernel_apply, func_zero_cpu) assert isinstance(numpy_apply.data, np.ndarray) assert numpy_agg.shape == numpy_apply.shape assert np.count_nonzero(numpy_apply.data) == 0 # dask + numpy case - dask_numpy_agg = xr.DataArray(da.from_array(data, chunks=(3, 3))) - dask_numpy_apply = apply(dask_numpy_agg, kernel, func_zero) + dask_numpy_agg = xr.DataArray(da.from_array(data_apply, chunks=(3, 3))) + dask_numpy_apply = apply(dask_numpy_agg, kernel_apply, func_zero_cpu) assert isinstance(dask_numpy_apply.data, da.Array) # both output same results assert np.isclose(numpy_apply, dask_numpy_apply.compute(), equal_nan=True).all() +@pytest.mark.skipif(doesnt_have_cuda(), reason="CUDA Device not Available") +def test_apply_gpu_equals_gpu(): + def func_zero(x): + return 0 + + @ngjit + def func_zero_cpu(x): + return 0 + + # cupy case + import cupy + cupy_agg = xr.DataArray(cupy.asarray(data_apply)) + cupy_apply = apply(cupy_agg, kernel_apply, func_zero) + assert isinstance(cupy_apply.data, cupy.ndarray) + # numpy case + numpy_agg = xr.DataArray(data_apply) + numpy_apply = apply(numpy_agg, kernel_apply, func_zero_cpu) + assert np.isclose(numpy_apply, cupy_apply.data.get(), equal_nan=True).all() + + # dask + cupy case not implemented + dask_cupy_agg = xr.DataArray(da.from_array(cupy.asarray(data_apply), chunks=(3, 3))) + with pytest.raises(NotImplementedError) as e_info: + apply(dask_cupy_agg, kernel_apply, func_zero) + assert e_info + + # def test_hotspot(): # n, m = 10, 10 # raster = xr.DataArray(np.zeros((n, m), dtype=float), dims=['y', 'x']) From dc7748cf899d6239c008200b898af982fcb1d08a Mon Sep 17 00:00:00 2001 From: thuydotm Date: Thu, 25 Feb 2021 21:40:09 +0700 Subject: [PATCH 20/30] focal hotspots: refactor --- xrspatial/focal.py | 130 +++++++++++++++++++--------------- xrspatial/tests/test_focal.py | 120 +++++++++++++++---------------- 2 files changed, 132 insertions(+), 118 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 46a1daa7..936fa936 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -175,38 +175,6 @@ def calc_sum(array): return np.nansum(array) -@ngjit -def upper_bound_p_value(zscore): - if abs(zscore) >= 2.33: - return 0.0099 - if abs(zscore) >= 1.65: - return 0.0495 - if abs(zscore) >= 1.29: - return 0.0985 - return 1 - - -@ngjit -def _hot_cold(zscore): - if zscore > 0: - return 1 - if zscore < 0: - return -1 - return 0 - - -@ngjit -def _confidence(zscore): - p_value = upper_bound_p_value(zscore) - if abs(zscore) > 2.58 and p_value < 0.01: - return 99 - if abs(zscore) > 1.96 and p_value < 0.05: - return 95 - if abs(zscore) > 1.65 and p_value < 0.1: - return 90 - return 0 - - @ngjit def _apply_numpy(data, kernel, func): out = np.zeros_like(data) @@ -275,10 +243,17 @@ def _apply_dask_cupy(data, kernel, func): def _apply(data, kernel, func): # numpy case if isinstance(data, np.ndarray): + if not (issubclass(data.dtype.type, np.integer) or + issubclass(data.dtype.type, np.floating)): + raise ValueError("data type must be integer or float") + out = _apply_numpy(data, kernel, func) # cupy case elif has_cuda() and isinstance(data, cupy.ndarray): + if not (issubclass(data.dtype.type, cupy.integer) or + issubclass(data.dtype.type, cupy.floating)): + raise ValueError("data type must be integer or float") out = _apply_cupy(data, cupy.asarray(kernel), func) # dask + cupy case @@ -311,6 +286,8 @@ def apply(raster, kernel, func=calc_mean): kernel = custom_kernel(kernel) # apply kernel to raster values + # if raster is a numpy or dask with numpy backed data array, + # the function func must be a @ngjit out = _apply(raster.data.astype(float), kernel, func) result = DataArray(out, @@ -322,7 +299,39 @@ def apply(raster, kernel, func=calc_mean): @ngjit -def _hotspots(z_array): +def upper_bound_p_value(zscore): + if abs(zscore) >= 2.33: + return 0.0099 + if abs(zscore) >= 1.65: + return 0.0495 + if abs(zscore) >= 1.29: + return 0.0985 + return 1 + + +@ngjit +def _hot_cold(zscore): + if zscore > 0: + return 1 + if zscore < 0: + return -1 + return 0 + + +@ngjit +def _confidence(zscore): + p_value = upper_bound_p_value(zscore) + if abs(zscore) > 2.58 and p_value < 0.01: + return 99 + if abs(zscore) > 1.96 and p_value < 0.05: + return 95 + if abs(zscore) > 1.65 and p_value < 0.1: + return 90 + return 0 + + +@ngjit +def _calc_hotspots_numpy(z_array): out = np.zeros_like(z_array, dtype=np.int8) rows, cols = z_array.shape for y in prange(rows): @@ -331,7 +340,27 @@ def _hotspots(z_array): return out -def hotspots(raster, kernel, x='x', y='y'): +def _hotspots_numpy(raster, kernel): + if not (issubclass(raster.values.dtype.type, np.integer) or + issubclass(raster.values.dtype.type, np.floating)): + raise ValueError( + "`raster` must be an array of integers or float") + + # apply kernel to raster values + mean_array = convolve_2d(raster.values, kernel / kernel.sum()) + + # calculate z-scores + global_mean = np.nanmean(raster.values) + global_std = np.nanstd(raster.values) + if global_std == 0: + raise ZeroDivisionError("Standard deviation of the input raster values is 0.") + z_array = (mean_array - global_mean) / global_std + + out = _calc_hotspots_numpy(z_array) + return out + + +def hotspots(raster, kernel): """Identify statistically significant hot spots and cold spots in an input raster. To be a statistically significant hot spot, a feature will have a high value and be surrounded by other features with high values as well. @@ -365,30 +394,15 @@ def hotspots(raster, kernel, x='x', y='y'): if raster.ndim != 2: raise ValueError("`raster` must be 2D") - if not (issubclass(raster.values.dtype.type, np.integer) or - issubclass(raster.values.dtype.type, np.floating)): - raise ValueError( - "`raster` must be an array of integers or float") - - raster_dims = raster.dims - if raster_dims != (y, x): - raise ValueError("raster.coords should be named as coordinates: (%s, %s)".format(y, x)) - - # apply kernel to raster values - mean_array = convolve_2d(raster.values, kernel / kernel.sum()) - - # calculate z-scores - global_mean = np.nanmean(raster.values) - global_std = np.nanstd(raster.values) - if global_std == 0: - raise ZeroDivisionError("Standard deviation of the input raster values is 0.") - z_array = (mean_array - global_mean) / global_std + # numpy case + if isinstance(raster.data, np.ndarray): + out = _hotspots_numpy(raster, kernel) - out = _hotspots(z_array) + else: + raise TypeError('Unsupported Array Type: {}'.format(type(raster.data))) - result = DataArray(out, - coords=raster.coords, - dims=raster.dims, - attrs=raster.attrs) + return DataArray(out, + coords=raster.coords, + dims=raster.dims, + attrs=raster.attrs) - return result diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 00010c21..8952a4d2 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -263,63 +263,63 @@ def func_zero_cpu(x): assert e_info -# def test_hotspot(): -# n, m = 10, 10 -# raster = xr.DataArray(np.zeros((n, m), dtype=float), dims=['y', 'x']) -# raster['x'] = np.linspace(0, n, n) -# raster['y'] = np.linspace(0, m, m) -# cellsize_x, cellsize_y = cellsize(raster) -# -# kernel = circle_kernel(cellsize_x, cellsize_y, 2.0) -# -# all_idx = zip(*np.where(raster.values == 0)) -# -# nan_cells = [(i, i) for i in range(m)] -# for cell in nan_cells: -# raster[cell[0], cell[1]] = np.nan -# -# # add some extreme values -# hot_region = [(1, 1), (1, 2), (1, 3), -# (2, 1), (2, 2), (2, 3), -# (3, 1), (3, 2), (3, 3)] -# cold_region = [(7, 7), (7, 8), (7, 9), -# (8, 7), (8, 8), (8, 9), -# (9, 7), (9, 8), (9, 9)] -# for p in hot_region: -# raster[p[0], p[1]] = 10000 -# for p in cold_region: -# raster[p[0], p[1]] = -10000 -# -# no_significant_region = [id for id in all_idx if id not in hot_region and -# id not in cold_region] -# -# hotspots_output = hotspots(raster, kernel) -# -# # check output's properties -# # output must be an xarray DataArray -# assert isinstance(hotspots_output, xr.DataArray) -# assert isinstance(hotspots_output.values, np.ndarray) -# assert issubclass(hotspots_output.values.dtype.type, np.int8) -# -# # shape, dims, coords, attr preserved -# assert raster.shape == hotspots_output.shape -# assert raster.dims == hotspots_output.dims -# assert raster.attrs == hotspots_output.attrs -# for coord in raster.coords: -# assert np.all(raster[coord] == hotspots_output[coord]) -# -# # no nan in output -# assert not np.isnan(np.min(hotspots_output)) -# -# # output of extreme regions are non-zeros -# # hot spots -# hot_spot = np.asarray([hotspots_output[p] for p in hot_region]) -# assert np.all(hot_spot >= 0) -# assert np.sum(hot_spot) > 0 -# # cold spots -# cold_spot = np.asarray([hotspots_output[p] for p in cold_region]) -# assert np.all(cold_spot <= 0) -# assert np.sum(cold_spot) < 0 -# # output of no significant regions are 0s -# no_sign = np.asarray([hotspots_output[p] for p in no_significant_region]) -# assert np.all(no_sign == 0) +def test_hotspot(): + n, m = 10, 10 + raster = xr.DataArray(np.zeros((n, m), dtype=float), dims=['y', 'x']) + raster['x'] = np.linspace(0, n, n) + raster['y'] = np.linspace(0, m, m) + cellsize_x, cellsize_y = cellsize(raster) + + kernel = circle_kernel(cellsize_x, cellsize_y, 2.0) + + all_idx = zip(*np.where(raster.values == 0)) + + nan_cells = [(i, i) for i in range(m)] + for cell in nan_cells: + raster[cell[0], cell[1]] = np.nan + + # add some extreme values + hot_region = [(1, 1), (1, 2), (1, 3), + (2, 1), (2, 2), (2, 3), + (3, 1), (3, 2), (3, 3)] + cold_region = [(7, 7), (7, 8), (7, 9), + (8, 7), (8, 8), (8, 9), + (9, 7), (9, 8), (9, 9)] + for p in hot_region: + raster[p[0], p[1]] = 10000 + for p in cold_region: + raster[p[0], p[1]] = -10000 + + no_significant_region = [id for id in all_idx if id not in hot_region and + id not in cold_region] + + hotspots_output = hotspots(raster, kernel) + + # check output's properties + # output must be an xarray DataArray + assert isinstance(hotspots_output, xr.DataArray) + assert isinstance(hotspots_output.values, np.ndarray) + assert issubclass(hotspots_output.values.dtype.type, np.int8) + + # shape, dims, coords, attr preserved + assert raster.shape == hotspots_output.shape + assert raster.dims == hotspots_output.dims + assert raster.attrs == hotspots_output.attrs + for coord in raster.coords: + assert np.all(raster[coord] == hotspots_output[coord]) + + # no nan in output + assert not np.isnan(np.min(hotspots_output)) + + # output of extreme regions are non-zeros + # hot spots + hot_spot = np.asarray([hotspots_output[p] for p in hot_region]) + assert np.all(hot_spot >= 0) + assert np.sum(hot_spot) > 0 + # cold spots + cold_spot = np.asarray([hotspots_output[p] for p in cold_region]) + assert np.all(cold_spot <= 0) + assert np.sum(cold_spot) < 0 + # output of no significant regions are 0s + no_sign = np.asarray([hotspots_output[p] for p in no_significant_region]) + assert np.all(no_sign == 0) From 285c8886657f1f3f678b02418e90c326d954a2e2 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Fri, 26 Feb 2021 00:42:46 +0700 Subject: [PATCH 21/30] focal hotspots: use raster.data for calculation --- xrspatial/focal.py | 17 +++++++++-------- xrspatial/tests/test_focal.py | 22 ++++++++++++---------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 936fa936..c7c77aeb 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -341,19 +341,20 @@ def _calc_hotspots_numpy(z_array): def _hotspots_numpy(raster, kernel): - if not (issubclass(raster.values.dtype.type, np.integer) or - issubclass(raster.values.dtype.type, np.floating)): - raise ValueError( - "`raster` must be an array of integers or float") + if not (issubclass(raster.data.dtype.type, np.integer) or + issubclass(raster.data.dtype.type, np.floating)): + raise ValueError("data type must be integer or float") # apply kernel to raster values - mean_array = convolve_2d(raster.values, kernel / kernel.sum()) + mean_array = convolve_2d(raster.data, kernel / kernel.sum()) # calculate z-scores - global_mean = np.nanmean(raster.values) - global_std = np.nanstd(raster.values) + global_mean = np.nanmean(raster.data) + global_std = np.nanstd(raster.data) if global_std == 0: - raise ZeroDivisionError("Standard deviation of the input raster values is 0.") + raise ZeroDivisionError( + "Standard deviation of the input raster values is 0." + ) z_array = (mean_array - global_mean) / global_std out = _calc_hotspots_numpy(z_array) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 8952a4d2..5b7bddc5 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -265,18 +265,13 @@ def func_zero_cpu(x): def test_hotspot(): n, m = 10, 10 - raster = xr.DataArray(np.zeros((n, m), dtype=float), dims=['y', 'x']) - raster['x'] = np.linspace(0, n, n) - raster['y'] = np.linspace(0, m, m) - cellsize_x, cellsize_y = cellsize(raster) + data = np.zeros((n, m), dtype=float) - kernel = circle_kernel(cellsize_x, cellsize_y, 2.0) - - all_idx = zip(*np.where(raster.values == 0)) + all_idx = zip(*np.where(data == 0)) nan_cells = [(i, i) for i in range(m)] for cell in nan_cells: - raster[cell[0], cell[1]] = np.nan + data[cell[0], cell[1]] = np.nan # add some extreme values hot_region = [(1, 1), (1, 2), (1, 3), @@ -286,9 +281,16 @@ def test_hotspot(): (8, 7), (8, 8), (8, 9), (9, 7), (9, 8), (9, 9)] for p in hot_region: - raster[p[0], p[1]] = 10000 + data[p[0], p[1]] = 10000 for p in cold_region: - raster[p[0], p[1]] = -10000 + data[p[0], p[1]] = -10000 + + raster = xr.DataArray(data, dims=['y', 'x']) + raster['x'] = np.linspace(0, n, n) + raster['y'] = np.linspace(0, m, m) + cellsize_x, cellsize_y = cellsize(raster) + + kernel = circle_kernel(cellsize_x, cellsize_y, 2.0) no_significant_region = [id for id in all_idx if id not in hot_region and id not in cold_region] From 0c229651b2172fb2ec6451db99a82aaeb59a7b55 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Fri, 26 Feb 2021 14:12:09 +0700 Subject: [PATCH 22/30] focal hotspots: refactor --- xrspatial/focal.py | 95 ++++++++++++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 33 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index c7c77aeb..0796d1d3 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -298,45 +298,40 @@ def apply(raster, kernel, func=calc_mean): return result -@ngjit -def upper_bound_p_value(zscore): - if abs(zscore) >= 2.33: - return 0.0099 - if abs(zscore) >= 1.65: - return 0.0495 - if abs(zscore) >= 1.29: - return 0.0985 - return 1 - - -@ngjit -def _hot_cold(zscore): - if zscore > 0: - return 1 - if zscore < 0: - return -1 - return 0 - - -@ngjit -def _confidence(zscore): - p_value = upper_bound_p_value(zscore) - if abs(zscore) > 2.58 and p_value < 0.01: - return 99 - if abs(zscore) > 1.96 and p_value < 0.05: - return 95 - if abs(zscore) > 1.65 and p_value < 0.1: - return 90 - return 0 - - @ngjit def _calc_hotspots_numpy(z_array): out = np.zeros_like(z_array, dtype=np.int8) rows, cols = z_array.shape + for y in prange(rows): for x in prange(cols): - out[y, x] = _hot_cold(z_array[y, x]) * _confidence(z_array[y, x]) + zscore = z_array[y, x] + + # find p value + p_value = 1.0 + if abs(zscore) >= 2.33: + p_value = 0.0099 + elif abs(zscore) >= 1.65: + p_value = 0.0495 + elif abs(zscore) >= 1.29: + p_value = 0.0985 + + # confidence + confidence = 0 + if abs(zscore) > 2.58 and p_value < 0.01: + confidence = 99 + elif abs(zscore) > 1.96 and p_value < 0.05: + confidence = 95 + elif abs(zscore) > 1.65 and p_value < 0.1: + confidence = 90 + + hot_cold = 0 + if zscore > 0: + hot_cold = 1 + elif zscore < 0: + hot_cold = -1 + + out[y, x] = hot_cold * confidence return out @@ -361,6 +356,36 @@ def _hotspots_numpy(raster, kernel): return out +def _calc_hotspots_cupy(z_array): + out = cupy.zeros_like(z_array, dtype=cupy.int8) + rows, cols = z_array.shape + for y in prange(rows): + for x in prange(cols): + out[y, x] = _hot_cold(z_array[y, x]) * _confidence(z_array[y, x]) + return out + + +def _hotspots_cupy(raster, kernel): + if not (issubclass(raster.data.dtype.type, cupy.integer) or + issubclass(raster.data.dtype.type, cupy.floating)): + raise ValueError("data type must be integer or float") + + # apply kernel to raster values + mean_array = convolve_2d(raster.data, kernel / kernel.sum()) + + # calculate z-scores + global_mean = cupy.nanmean(raster.data) + global_std = cupy.nanstd(raster.data) + if global_std == 0: + raise ZeroDivisionError( + "Standard deviation of the input raster values is 0." + ) + z_array = (mean_array - global_mean) / global_std + + out = _calc_hotspots_cupy(z_array) + return out + + def hotspots(raster, kernel): """Identify statistically significant hot spots and cold spots in an input raster. To be a statistically significant hot spot, a feature will have a @@ -399,6 +424,10 @@ def hotspots(raster, kernel): if isinstance(raster.data, np.ndarray): out = _hotspots_numpy(raster, kernel) + # cupy case + elif has_cuda() and isinstance(raster.data, cupy.ndarray): + out = _hotspots_cupy(raster, kernel) + else: raise TypeError('Unsupported Array Type: {}'.format(type(raster.data))) From 88979deae3fbfd7e77e84d0312da093f8a305406 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Fri, 26 Feb 2021 15:07:17 +0700 Subject: [PATCH 23/30] focal hotspots: cupy case --- xrspatial/focal.py | 29 +++++++++++++- xrspatial/tests/test_focal.py | 74 +++++++++++++++++++++++++++-------- 2 files changed, 85 insertions(+), 18 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 0796d1d3..9263b4db 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -359,9 +359,36 @@ def _hotspots_numpy(raster, kernel): def _calc_hotspots_cupy(z_array): out = cupy.zeros_like(z_array, dtype=cupy.int8) rows, cols = z_array.shape + for y in prange(rows): for x in prange(cols): - out[y, x] = _hot_cold(z_array[y, x]) * _confidence(z_array[y, x]) + zscore = z_array[y, x] + + # find p value + p_value = 1.0 + if abs(zscore) >= 2.33: + p_value = 0.0099 + elif abs(zscore) >= 1.65: + p_value = 0.0495 + elif abs(zscore) >= 1.29: + p_value = 0.0985 + + # confidence + confidence = 0 + if abs(zscore) > 2.58 and p_value < 0.01: + confidence = 99 + elif abs(zscore) > 1.96 and p_value < 0.05: + confidence = 95 + elif abs(zscore) > 1.65 and p_value < 0.1: + confidence = 90 + + hot_cold = 0 + if zscore > 0: + hot_cold = 1 + elif zscore < 0: + hot_cold = -1 + + out[y, x] = hot_cold * confidence return out diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 5b7bddc5..f8a84d0a 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -263,6 +263,7 @@ def func_zero_cpu(x): assert e_info + def test_hotspot(): n, m = 10, 10 data = np.zeros((n, m), dtype=float) @@ -285,43 +286,82 @@ def test_hotspot(): for p in cold_region: data[p[0], p[1]] = -10000 - raster = xr.DataArray(data, dims=['y', 'x']) - raster['x'] = np.linspace(0, n, n) - raster['y'] = np.linspace(0, m, m) - cellsize_x, cellsize_y = cellsize(raster) + numpy_agg = xr.DataArray(data, dims=['y', 'x']) + numpy_agg['x'] = np.linspace(0, n, n) + numpy_agg['y'] = np.linspace(0, m, m) + cellsize_x, cellsize_y = cellsize(numpy_agg) kernel = circle_kernel(cellsize_x, cellsize_y, 2.0) no_significant_region = [id for id in all_idx if id not in hot_region and id not in cold_region] - hotspots_output = hotspots(raster, kernel) + numpy_hotspots = hotspots(numpy_agg, kernel) # check output's properties # output must be an xarray DataArray - assert isinstance(hotspots_output, xr.DataArray) - assert isinstance(hotspots_output.values, np.ndarray) - assert issubclass(hotspots_output.values.dtype.type, np.int8) + assert isinstance(numpy_hotspots, xr.DataArray) + assert isinstance(numpy_hotspots.values, np.ndarray) + assert issubclass(numpy_hotspots.values.dtype.type, np.int8) # shape, dims, coords, attr preserved - assert raster.shape == hotspots_output.shape - assert raster.dims == hotspots_output.dims - assert raster.attrs == hotspots_output.attrs - for coord in raster.coords: - assert np.all(raster[coord] == hotspots_output[coord]) + assert numpy_agg.shape == numpy_hotspots.shape + assert numpy_agg.dims == numpy_hotspots.dims + assert numpy_agg.attrs == numpy_hotspots.attrs + for coord in numpy_agg.coords: + assert np.all(numpy_agg[coord] == numpy_hotspots[coord]) # no nan in output - assert not np.isnan(np.min(hotspots_output)) + assert not np.isnan(np.min(numpy_hotspots)) # output of extreme regions are non-zeros # hot spots - hot_spot = np.asarray([hotspots_output[p] for p in hot_region]) + hot_spot = np.asarray([numpy_hotspots[p] for p in hot_region]) assert np.all(hot_spot >= 0) assert np.sum(hot_spot) > 0 # cold spots - cold_spot = np.asarray([hotspots_output[p] for p in cold_region]) + cold_spot = np.asarray([numpy_hotspots[p] for p in cold_region]) assert np.all(cold_spot <= 0) assert np.sum(cold_spot) < 0 # output of no significant regions are 0s - no_sign = np.asarray([hotspots_output[p] for p in no_significant_region]) + no_sign = np.asarray([numpy_hotspots[p] for p in no_significant_region]) assert np.all(no_sign == 0) + + +@pytest.mark.skipif(doesnt_have_cuda(), reason="CUDA Device not Available") +def test_hotspot_gpu_equals_cpu(): + n, m = 10, 10 + data = np.zeros((n, m), dtype=float) + + nan_cells = [(i, i) for i in range(m)] + for cell in nan_cells: + data[cell[0], cell[1]] = np.nan + + # add some extreme values + hot_region = [(1, 1), (1, 2), (1, 3), + (2, 1), (2, 2), (2, 3), + (3, 1), (3, 2), (3, 3)] + cold_region = [(7, 7), (7, 8), (7, 9), + (8, 7), (8, 8), (8, 9), + (9, 7), (9, 8), (9, 9)] + for p in hot_region: + data[p[0], p[1]] = 10000 + for p in cold_region: + data[p[0], p[1]] = -10000 + + numpy_agg = xr.DataArray(data, dims=['y', 'x']) + numpy_agg['x'] = np.linspace(0, n, n) + numpy_agg['y'] = np.linspace(0, m, m) + + cellsize_x, cellsize_y = cellsize(numpy_agg) + kernel = circle_kernel(cellsize_x, cellsize_y, 2.0) + + numpy_hotspots = hotspots(numpy_agg, kernel) + + import cupy + + cupy_agg = xr.DataArray(cupy.asarray(data)) + cupy_hotspots = hotspots(cupy_agg, kernel) + + assert isinstance(cupy_hotspots.data, cupy.ndarray) + assert np.isclose(numpy_hotspots, cupy_hotspots.data.get(), equal_nan=True).all() From b519d388af2ed50ab91549766b5e8a3ec9cf8fd9 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Fri, 26 Feb 2021 15:49:14 +0700 Subject: [PATCH 24/30] focal hotspots: dask numpy case --- xrspatial/focal.py | 29 +++++++++++++++++++++++++++++ xrspatial/tests/test_focal.py | 15 +++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 9263b4db..54ac83dc 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -356,6 +356,31 @@ def _hotspots_numpy(raster, kernel): return out +def _hotspots_dask_numpy(raster, kernel): + + # apply kernel to raster values + mean_array = convolve_2d(raster.data, kernel / kernel.sum()) + + # calculate z-scores + global_mean = da.nanmean(raster.data) + global_std = da.nanstd(raster.data) + if global_std == 0: + raise ZeroDivisionError( + "Standard deviation of the input raster values is 0." + ) + z_array = (mean_array - global_mean) / global_std + + _func = partial(_calc_hotspots_numpy) + pad_h = kernel.shape[0] // 2 + pad_w = kernel.shape[1] // 2 + + out = z_array.map_overlap(_func, + depth=(pad_h, pad_w), + boundary=np.nan, + meta=np.array(())) + return out + + def _calc_hotspots_cupy(z_array): out = cupy.zeros_like(z_array, dtype=cupy.int8) rows, cols = z_array.shape @@ -455,6 +480,10 @@ def hotspots(raster, kernel): elif has_cuda() and isinstance(raster.data, cupy.ndarray): out = _hotspots_cupy(raster, kernel) + # dask + numpy case + elif isinstance(raster.data, da.Array): + out = _hotspots_dask_numpy(raster, kernel) + else: raise TypeError('Unsupported Array Type: {}'.format(type(raster.data))) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index f8a84d0a..5d2fb697 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -263,7 +263,6 @@ def func_zero_cpu(x): assert e_info - def test_hotspot(): n, m = 10, 10 data = np.zeros((n, m), dtype=float) @@ -296,8 +295,19 @@ def test_hotspot(): no_significant_region = [id for id in all_idx if id not in hot_region and id not in cold_region] + # numpy case numpy_hotspots = hotspots(numpy_agg, kernel) + # dask + numpy + dask_numpy_agg = xr.DataArray(da.from_array(data, chunks=(3, 3))) + dask_numpy_hotspots = hotspots(dask_numpy_agg, kernel) + + assert isinstance(dask_numpy_hotspots.data, da.Array) + + # both output same results + assert np.isclose(numpy_hotspots.data, dask_numpy_hotspots.data.compute(), + equal_nan=True).all() + # check output's properties # output must be an xarray DataArray assert isinstance(numpy_hotspots, xr.DataArray) @@ -355,9 +365,10 @@ def test_hotspot_gpu_equals_cpu(): cellsize_x, cellsize_y = cellsize(numpy_agg) kernel = circle_kernel(cellsize_x, cellsize_y, 2.0) - + # numpy case numpy_hotspots = hotspots(numpy_agg, kernel) + # cupy case import cupy cupy_agg = xr.DataArray(cupy.asarray(data)) From 6de3006433f1037ce26c58a9c6b0040b5d2a73c5 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Fri, 26 Feb 2021 15:52:55 +0700 Subject: [PATCH 25/30] focal hotspots: dask cupy case not implemented --- xrspatial/focal.py | 5 +++++ xrspatial/tests/test_focal.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 54ac83dc..e3d55937 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -18,6 +18,7 @@ class cupy(object): from numba import cuda from xrspatial.utils import cuda_args from xrspatial.utils import has_cuda +from xrspatial.utils import is_cupy_backed from xrspatial.utils import ngjit from xrspatial.convolution import convolve_2d, custom_kernel @@ -480,6 +481,10 @@ def hotspots(raster, kernel): elif has_cuda() and isinstance(raster.data, cupy.ndarray): out = _hotspots_cupy(raster, kernel) + # dask + cupy case + elif has_cuda() and isinstance(raster.data, da.Array) and is_cupy_backed(raster): + raise NotImplementedError() + # dask + numpy case elif isinstance(raster.data, da.Array): out = _hotspots_dask_numpy(raster, kernel) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 5d2fb697..a76884dd 100755 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -376,3 +376,9 @@ def test_hotspot_gpu_equals_cpu(): assert isinstance(cupy_hotspots.data, cupy.ndarray) assert np.isclose(numpy_hotspots, cupy_hotspots.data.get(), equal_nan=True).all() + + # dask + cupy case not implemented + dask_cupy_agg = xr.DataArray(da.from_array(cupy.asarray(data), chunks=(3, 3))) + with pytest.raises(NotImplementedError) as e_info: + hotspots(dask_cupy_agg, kernel) + assert e_info From aaea3137aa8d07652ad5acc216565c282f8e4bd8 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Sun, 28 Feb 2021 16:37:31 +0700 Subject: [PATCH 26/30] update readme --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 842301c0..ef250deb 100755 --- a/README.md +++ b/README.md @@ -81,10 +81,10 @@ In the GIS world, rasters are used for representing continuous phenomena (e.g. e | Name | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray | |:----------:|:----------------------:|:--------------------:|:-------------------:|:------:| -| [Apply](xrspatial/focal.py) | ✅️ | | | | -| [Hotspots](xrspatial/focal.py) | ✅️ | | | | -| [Mean](xrspatial/focal.py) | ✅️ | | | | -| [Focal Statistics](xrspatial/focal.py) | ✅️ | | ✅️ | | +| [Apply](xrspatial/focal.py) | ✅️ | ✅️ | ✅️ | | +| [Hotspots](xrspatial/focal.py) | ✅️ | ✅️ | ✅️ | | +| [Mean](xrspatial/focal.py) | ✅️ | ✅️ | ✅️ | | +| [Focal Statistics](xrspatial/focal.py) | ✅️ | ✅️ | ✅️ | | ------- From 4ec1be900ca91440641fedeb1b36992f1d8facc2 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 21 Apr 2021 10:48:31 +0700 Subject: [PATCH 27/30] flake8 fix --- xrspatial/convolution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xrspatial/convolution.py b/xrspatial/convolution.py index 7e601dda..83381362 100755 --- a/xrspatial/convolution.py +++ b/xrspatial/convolution.py @@ -485,4 +485,4 @@ def convolve_2d(data, kernel): else: raise TypeError('Unsupported Array Type: {}'.format(type(data))) - return out \ No newline at end of file + return out From 6e0f3e6cdca52cf6c45d9a87303bbe8f5a52d70d Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 21 Apr 2021 10:53:50 +0700 Subject: [PATCH 28/30] flake8 fix --- xrspatial/convolution.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/xrspatial/convolution.py b/xrspatial/convolution.py index 83381362..d417c14e 100755 --- a/xrspatial/convolution.py +++ b/xrspatial/convolution.py @@ -375,17 +375,17 @@ def _convolve_2d_cuda(data, kernel, out): return # The out at coordinates (i, j) is equal to - # sum_{k, l} kernel[k, l] * data[i - k + delta_rows, j - l + delta_cols] - # with k and l going through the whole kernel array: + # sum_{k, h} kernel[k, h] * data[i - k + delta_rows, j - h + delta_cols] + # with k and h going through the whole kernel array: s = 0 for k in range(kernel.shape[0]): - for l in range(kernel.shape[1]): + for h in range(kernel.shape[1]): i_k = i - k + delta_rows - j_l = j - l + delta_cols - # (-4-) Check if (i_k, j_l) coordinates are inside the array: + j_h = j - h + delta_cols + # (-4-) Check if (i_k, j_h) coordinates are inside the array: if (i_k >= 0) and (i_k < data_rows) and \ - (j_l >= 0) and (j_l < data_cols): - s += kernel[k, l] * data[i_k, j_l] + (j_h >= 0) and (j_h < data_cols): + s += kernel[k, h] * data[i_k, j_h] out[i, j] = s From e9753aa97629ac1e7ad08edf3cff201f65417d05 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Mon, 3 May 2021 11:46:10 +0700 Subject: [PATCH 29/30] hotspots: avoid early compute global_std --- xrspatial/focal.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index e02d01cd..887f4127 100755 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -388,10 +388,13 @@ def _hotspots_dask_numpy(raster, kernel): # calculate z-scores global_mean = da.nanmean(raster.data) global_std = da.nanstd(raster.data) - if global_std == 0: - raise ZeroDivisionError( - "Standard deviation of the input raster values is 0." - ) + + # commented out to avoid early compute to check if global_std is zero + # if global_std == 0: + # raise ZeroDivisionError( + # "Standard deviation of the input raster values is 0." + # ) + z_array = (mean_array - global_mean) / global_std _func = partial(_calc_hotspots_numpy) From 9cc84125f34f53801b39d52f58f76686e2405c28 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Mon, 3 May 2021 11:53:29 +0700 Subject: [PATCH 30/30] correct typo --- examples/user_guide/4_Focal.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/user_guide/4_Focal.ipynb b/examples/user_guide/4_Focal.ipynb index 36dd88ae..4d2e518c 100755 --- a/examples/user_guide/4_Focal.ipynb +++ b/examples/user_guide/4_Focal.ipynb @@ -124,7 +124,7 @@ "source": [ "from xrspatial import focal\n", "\n", - "cellsize_x, cellsize_y = focal.cellsize(terrain)\n", + "cellsize_x, cellsize_y = focal.calc_cellsize(terrain)\n", "# Use an annulus kernel with a ring at a distance from 25-30 cells away from focal point\n", "outer_radius = cellsize_x * 30\n", "inner_radius = cellsize_x * 25\n",