From 47ad73ab57ded2bea7685b9ef7ba23c275a6d788 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= Date: Sun, 23 Apr 2023 21:14:59 +0200 Subject: [PATCH] Floordiv (#593) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * implement floor division * fix 3D, 4D loops * add missing array declaration in 3D, and 4D * Add test cases for floor division and fix it for ints (#599) * Add test cases for floor division * Fix define name in comment * Fix floor division of ints --------- Co-authored-by: Maciej Sokołowski --- code/ndarray.c | 6 + code/ndarray_operators.c | 98 +++++++++++- code/ndarray_operators.h | 263 +++++++++++++++++++++++++++++++- code/ulab.h | 4 + tests/2d/numpy/operators.py | 12 ++ tests/2d/numpy/operators.py.exp | 11 ++ 6 files changed, 392 insertions(+), 2 deletions(-) diff --git a/code/ndarray.c b/code/ndarray.c index bade22cf..ae219d28 100644 --- a/code/ndarray.c +++ b/code/ndarray.c @@ -1936,6 +1936,12 @@ mp_obj_t ndarray_binary_op(mp_binary_op_t _op, mp_obj_t lobj, mp_obj_t robj) { return ndarray_binary_power(lhs, rhs, ndim, shape, lstrides, rstrides); break; #endif + #if NDARRAY_HAS_BINARY_OP_FLOOR_DIVIDE + case MP_BINARY_OP_FLOOR_DIVIDE: + COMPLEX_DTYPE_NOT_IMPLEMENTED(lhs->dtype); + return ndarray_binary_floor_divide(lhs, rhs, ndim, shape, lstrides, rstrides); + break; + #endif default: return MP_OBJ_NULL; // op not supported break; diff --git a/code/ndarray_operators.c b/code/ndarray_operators.c index b941daf5..dd2b24d6 100644 --- a/code/ndarray_operators.c +++ b/code/ndarray_operators.c @@ -673,6 +673,102 @@ mp_obj_t ndarray_binary_true_divide(ndarray_obj_t *lhs, ndarray_obj_t *rhs, } #endif /* NDARRAY_HAS_BINARY_OP_TRUE_DIVIDE */ +#if NDARRAY_HAS_BINARY_OP_FLOOR_DIVIDE +mp_obj_t ndarray_binary_floor_divide(ndarray_obj_t *lhs, ndarray_obj_t *rhs, + uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) { + + ndarray_obj_t *results = NULL; + uint8_t *larray = (uint8_t *)lhs->array; + uint8_t *rarray = (uint8_t *)rhs->array; + + if(lhs->dtype == NDARRAY_UINT8) { + if(rhs->dtype == NDARRAY_UINT8) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8); + FLOOR_DIVIDE_LOOP_UINT(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_INT8) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16); + FLOOR_DIVIDE_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_UINT16) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16); + FLOOR_DIVIDE_LOOP_UINT(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_INT16) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16); + FLOOR_DIVIDE_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_FLOAT) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT); + FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides); + } + } else if(lhs->dtype == NDARRAY_INT8) { + if(rhs->dtype == NDARRAY_UINT8) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16); + FLOOR_DIVIDE_LOOP(results, int16_t, int8_t, uint8_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_INT8) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8); + FLOOR_DIVIDE_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_UINT16) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16); + FLOOR_DIVIDE_LOOP(results, uint16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_INT16) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16); + FLOOR_DIVIDE_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_FLOAT) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT); + FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides); + } + } else if(lhs->dtype == NDARRAY_UINT16) { + if(rhs->dtype == NDARRAY_UINT8) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16); + FLOOR_DIVIDE_LOOP_UINT(results, uint16_t, uint16_t, uint8_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_INT8) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16); + FLOOR_DIVIDE_LOOP(results, uint16_t, uint16_t, int8_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_UINT16) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16); + FLOOR_DIVIDE_LOOP_UINT(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_INT16) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT); + FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_FLOAT) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT); + FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides); + } + } else if(lhs->dtype == NDARRAY_INT16) { + if(rhs->dtype == NDARRAY_UINT8) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16); + FLOOR_DIVIDE_LOOP(results, int16_t, int16_t, uint8_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_INT8) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16); + FLOOR_DIVIDE_LOOP(results, int16_t, int16_t, int8_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_UINT16) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT); + FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, int16_t, uint16_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_INT16) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16); + FLOOR_DIVIDE_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_FLOAT) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT); + FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides); + } + } else if(lhs->dtype == NDARRAY_FLOAT) { + results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT); + if(rhs->dtype == NDARRAY_UINT8) { + FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_INT8) { + FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, mp_float_t, int8_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_UINT16) { + FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_INT16) { + FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, mp_float_t, int16_t, larray, lstrides, rarray, rstrides); + } else if(rhs->dtype == NDARRAY_FLOAT) { + FLOOR_DIVIDE_LOOP_FLOAT(results, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides); + } + } + + return MP_OBJ_FROM_PTR(results); + +} +#endif /* NDARRAY_HAS_BINARY_OP_FLOOR_DIVIDE */ + #if NDARRAY_HAS_BINARY_OP_POWER mp_obj_t ndarray_binary_power(ndarray_obj_t *lhs, ndarray_obj_t *rhs, uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) { @@ -812,7 +908,7 @@ mp_obj_t ndarray_inplace_divide(ndarray_obj_t *lhs, ndarray_obj_t *rhs, int32_t } return MP_OBJ_FROM_PTR(lhs); } -#endif /* NDARRAY_HAS_INPLACE_DIVIDE */ +#endif /* NDARRAY_HAS_INPLACE_TRUE_DIVIDE */ #if NDARRAY_HAS_INPLACE_POWER mp_obj_t ndarray_inplace_power(ndarray_obj_t *lhs, ndarray_obj_t *rhs, int32_t *rstrides) { diff --git a/code/ndarray_operators.h b/code/ndarray_operators.h index 7849e030..b8f080fa 100644 --- a/code/ndarray_operators.h +++ b/code/ndarray_operators.h @@ -5,7 +5,7 @@ * * The MIT License (MIT) * - * Copyright (c) 2020-2021 Zoltán Vörös + * Copyright (c) 2020-2023 Zoltán Vörös */ #include "ndarray.h" @@ -17,6 +17,7 @@ mp_obj_t ndarray_binary_more(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t mp_obj_t ndarray_binary_power(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *); mp_obj_t ndarray_binary_subtract(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *); mp_obj_t ndarray_binary_true_divide(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *); +mp_obj_t ndarray_binary_floor_divide(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *); mp_obj_t ndarray_inplace_ams(ndarray_obj_t *, ndarray_obj_t *, int32_t *, uint8_t ); mp_obj_t ndarray_inplace_power(ndarray_obj_t *, ndarray_obj_t *, int32_t *); @@ -275,3 +276,263 @@ mp_obj_t ndarray_inplace_divide(ndarray_obj_t *, ndarray_obj_t *, int32_t *); } while(i < (results)->shape[ULAB_MAX_DIMS - 4]);\ }) #endif /* ULAB_MAX_DIMS == 4 */ + +#define FLOOR_DIVIDE_UINT1(results, array, type_left, type_right, larray, lstrides, rarray, rstrides)\ +({\ + size_t l = 0;\ + do {\ + *(array)++ = *((type_left *)(larray)) / *((type_right *)(rarray));\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\ + l++;\ + } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\ +}) + +#define FLOOR_DIVIDE1(results, array, type_left, type_right, larray, lstrides, rarray, rstrides)\ +({\ + size_t l = 0;\ + int16_t num;\ + int16_t denom = (int16_t)*((type_right *)(rarray));\ + do {\ + num = (int16_t)*((type_left *)(larray));\ + if(num >= 0) {\ + if(denom < 0) {\ + num += -denom - 1;\ + }\ + } else {\ + if(denom >= 0) {\ + num += -denom + 1;\ + }\ + }\ + *(array)++ = num / denom;\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\ + l++;\ + } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\ +}) + +#define FLOOR_DIVIDE_FLOAT1(results, array, type_left, type_right, larray, lstrides, rarray, rstrides)\ +({\ + size_t l = 0;\ + do {\ + *(array)++ = MICROPY_FLOAT_C_FUN(floor)(*((type_left *)(larray)) / *((type_right *)(rarray)));\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\ + l++;\ + } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\ +}) + +#if ULAB_MAX_DIMS == 1 +#define FLOOR_DIVIDE_LOOP_UINT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\ + type_out *array = (type_out *)(results)->array;\ + FLOOR_DIVIDE_UINT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\ +} while(0) + +#define FLOOR_DIVIDE_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\ + type_out *array = (type_out *)(results)->array;\ + FLOOR_DIVIDE1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\ +} while(0) + +#define FLOOR_DIVIDE_LOOP_FLOAT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\ + type_out *array = (type_out *)(results)->array;\ + FLOOR_DIVIDE_FLOAT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\ +} while(0) +#endif /* ULAB_MAX_DIMS == 1 */ + +#if ULAB_MAX_DIMS == 2 +#define FLOOR_DIVIDE_LOOP_UINT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\ + type_out *array = (type_out *)(results)->array;\ + size_t l = 0;\ + do {\ + FLOOR_DIVIDE_UINT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\ + l++;\ + } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\ +} while(0) + +#define FLOOR_DIVIDE_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\ + type_out *array = (type_out *)(results)->array;\ + size_t l = 0;\ + do {\ + FLOOR_DIVIDE1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\ + l++;\ + } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\ +} while(0) + +#define FLOOR_DIVIDE_LOOP_FLOAT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\ + type_out *array = (type_out *)(results)->array;\ + size_t l = 0;\ + do {\ + FLOOR_DIVIDE_FLOAT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\ + l++;\ + } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\ +} while(0) + +#endif /* ULAB_MAX_DIMS == 2 */ + +#if ULAB_MAX_DIMS == 3 +#define FLOOR_DIVIDE_LOOP_UINT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\ + type_out *array = (type_out *)(results)->array;\ + size_t k = 0;\ + do {\ + size_t l = 0;\ + do {\ + FLOOR_DIVIDE_UINT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\ + l++;\ + } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\ + k++;\ + } while(k < (results)->shape[ULAB_MAX_DIMS - 3]);\ +} while(0) + +#define FLOOR_DIVIDE_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\ + type_out *array = (type_out *)(results)->array;\ + size_t k = 0;\ + do {\ + size_t l = 0;\ + do {\ + FLOOR_DIVIDE1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\ + l++;\ + } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\ + k++;\ + } while(k < (results)->shape[ULAB_MAX_DIMS - 3]);\ +} while(0) + +#define FLOOR_DIVIDE_LOOP_FLOAT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\ + type_out *array = (type_out *)(results)->array;\ + size_t k = 0;\ + do {\ + size_t l = 0;\ + do {\ + FLOOR_DIVIDE_FLOAT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\ + l++;\ + } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\ + k++;\ + } while(k < (results)->shape[ULAB_MAX_DIMS - 3]);\ +} while(0) + +#endif /* ULAB_MAX_DIMS == 3 */ + +#if ULAB_MAX_DIMS == 4 +#define FLOOR_DIVIDE_LOOP_UINT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\ + type_out *array = (type_out *)(results)->array;\ + size_t j = 0;\ + do {\ + size_t k = 0;\ + do {\ + size_t l = 0;\ + do {\ + FLOOR_DIVIDE_UINT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\ + l++;\ + } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\ + k++;\ + } while(k < (results)->shape[ULAB_MAX_DIMS - 3]);\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\ + j++;\ + } while(j < (results)->shape[ULAB_MAX_DIMS - 4]);\ +} while(0) + +#define FLOOR_DIVIDE_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\ + type_out *array = (type_out *)(results)->array;\ + size_t j = 0;\ + do {\ + size_t k = 0;\ + do {\ + size_t l = 0;\ + do {\ + FLOOR_DIVIDE1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\ + l++;\ + } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\ + k++;\ + } while(k < (results)->shape[ULAB_MAX_DIMS - 3]);\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\ + j++;\ + } while(j < (results)->shape[ULAB_MAX_DIMS - 4]);\ +} while(0) + +#define FLOOR_DIVIDE_LOOP_FLOAT(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\ + type_out *array = (type_out *)(results)->array;\ + size_t j = 0;\ + do {\ + size_t k = 0;\ + do {\ + size_t l = 0;\ + do {\ + FLOOR_DIVIDE_FLOAT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\ + l++;\ + } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\ + k++;\ + } while(k < (results)->shape[ULAB_MAX_DIMS - 3]);\ + (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\ + (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\ + (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\ + (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\ + j++;\ + } while(j < (results)->shape[ULAB_MAX_DIMS - 4]);\ +} while(0) + +#endif /* ULAB_MAX_DIMS == 4 */ diff --git a/code/ulab.h b/code/ulab.h index f3fdeb45..eedddc5e 100644 --- a/code/ulab.h +++ b/code/ulab.h @@ -101,6 +101,10 @@ #define NDARRAY_HAS_BINARY_OP_EQUAL (1) #endif +#ifndef NDARRAY_HAS_BINARY_OP_FLOOR_DIVIDE +#define NDARRAY_HAS_BINARY_OP_FLOOR_DIVIDE (1) +#endif + #ifndef NDARRAY_HAS_BINARY_OP_LESS #define NDARRAY_HAS_BINARY_OP_LESS (1) #endif diff --git a/tests/2d/numpy/operators.py b/tests/2d/numpy/operators.py index acc316b1..42d00432 100644 --- a/tests/2d/numpy/operators.py +++ b/tests/2d/numpy/operators.py @@ -81,6 +81,10 @@ print(np.array([1,2,3], dtype=np.float) / np.array([4,5,6], dtype=np.uint16)) print(np.array([1,2,3], dtype=np.float) / np.array([4,5,6], dtype=np.int16)) +print(np.array([10,20,30], dtype=np.float) // np.array([4,5,6], dtype=np.float)) +print(np.array([10,20,30], dtype=np.float) // np.array([4,5,6], dtype=np.uint16)) +print(np.array([10,20,30], dtype=np.float) // np.array([4,5,6], dtype=np.int16)) + print(np.array([1,2,3], dtype=np.float) - 4) print(np.array([1,2,3], dtype=np.float) - 4.0) print(np.array([1,2,3], dtype=np.float) + 4) @@ -93,6 +97,14 @@ print(np.array([1,2,3], dtype=np.float) / 4) print(np.array([1,2,3], dtype=np.float) / 4.0) +print(np.array([10,20,30], dtype=np.float) // 4) +print(np.array([10,20,30], dtype=np.float) // 4.0) +print(np.array([10,20,30], dtype=np.int8) // 4) +print(np.array([10,20,30], dtype=np.int8) // 4.0) +print(np.array([10,20,30], dtype=np.uint16) // 4) +print(np.array([10,20,30], dtype=np.uint16) // 4.0) +print(np.array([10,20,30], dtype=np.int16) // 4) +print(np.array([10,20,30], dtype=np.int16) // 4.0) a = np.array([1,2,3], dtype=np.float) a -= np.array([4,5,6], dtype=np.float) diff --git a/tests/2d/numpy/operators.py.exp b/tests/2d/numpy/operators.py.exp index 9749509d..319c2073 100644 --- a/tests/2d/numpy/operators.py.exp +++ b/tests/2d/numpy/operators.py.exp @@ -60,6 +60,9 @@ array([1.0, 32.0, 729.0], dtype=float64) array([0.25, 0.4, 0.5], dtype=float64) array([0.25, 0.4, 0.5], dtype=float64) array([0.25, 0.4, 0.5], dtype=float64) +array([2.0, 4.0, 5.0], dtype=float64) +array([2.0, 4.0, 5.0], dtype=float64) +array([2.0, 4.0, 5.0], dtype=float64) array([-3.0, -2.0, -1.0], dtype=float64) array([-3.0, -2.0, -1.0], dtype=float64) array([5.0, 6.0, 7.0], dtype=float64) @@ -70,6 +73,14 @@ array([1.0, 16.0, 81.0], dtype=float64) array([1.0, 16.0, 81.0], dtype=float64) array([0.25, 0.5, 0.75], dtype=float64) array([0.25, 0.5, 0.75], dtype=float64) +array([2.0, 5.0, 7.0], dtype=float64) +array([2.0, 5.0, 7.0], dtype=float64) +array([2, 5, 7], dtype=int8) +array([2.0, 5.0, 7.0], dtype=float64) +array([2, 5, 7], dtype=uint16) +array([2.0, 5.0, 7.0], dtype=float64) +array([2, 5, 7], dtype=int16) +array([2.0, 5.0, 7.0], dtype=float64) array([-3.0, -3.0, -3.0], dtype=float64) array([-3.0, -3.0, -3.0], dtype=float64) array([-3.0, -3.0, -3.0], dtype=float64)