From 135ae5c279a9238c2da1f294fb8b2a444ed5201a Mon Sep 17 00:00:00 2001 From: Yung-Yu Chen Date: Mon, 23 Jan 2023 17:45:37 +0800 Subject: [PATCH] [nsd/11arraydesign] Update data prep to use SimpleArray --- .github/workflows/nsd.yml | 2 - nsd/11arraydesign/code/04_fit_poly.py | 9 +- nsd/11arraydesign/code/Makefile | 35 +++--- nsd/11arraydesign/code/data_prep.cpp | 170 +++++++++++++++++++------- 4 files changed, 150 insertions(+), 66 deletions(-) diff --git a/.github/workflows/nsd.yml b/.github/workflows/nsd.yml index 5d84e75..622f561 100644 --- a/.github/workflows/nsd.yml +++ b/.github/workflows/nsd.yml @@ -205,7 +205,5 @@ jobs: make -C ${{ env.ARRAYDESIGNROOT }} solve_cpp_xarray.so - name: 11arraydesign_data_prep - # macos does not have MKL. - if: startsWith(matrix.os, 'ubuntu') run: | make -C ${{ env.ARRAYDESIGNROOT }} data_prep.so diff --git a/nsd/11arraydesign/code/04_fit_poly.py b/nsd/11arraydesign/code/04_fit_poly.py index 90b897c..e1d90a0 100755 --- a/nsd/11arraydesign/code/04_fit_poly.py +++ b/nsd/11arraydesign/code/04_fit_poly.py @@ -12,6 +12,7 @@ def __exit__(self, *args, **kw): self._t1 = time.time() print("Wall time: {:g} s".format(self._t1 - self._t0)) +print("prep") # [begin prep pycon] with Timer(): # np.unique returns a sorted array. @@ -44,6 +45,7 @@ def plot_poly_fitted(i): print('write to {}'.format(imagepath)) plt.savefig(imagepath, dpi=150) +print("lumped") # [begin lumped pycon] with Timer(): # Do the calculation for the 1000 groups of points. @@ -56,6 +58,7 @@ def plot_poly_fitted(i): polygroup[i,:] = data_prep.fit_poly(sub_x, sub_y, 2) # [end lumped pycon] +print("point build") # [begin point build pycon] with Timer(): # Using numpy to build the point groups takes a lot of time. @@ -65,6 +68,7 @@ def plot_poly_fitted(i): data_groups.append((xdata[slct], ydata[slct])) # [end point build pycon] +print("fitting") # [begin fitting pycon] with Timer(): # Fitting helper runtime is much less than building the point groups. @@ -73,6 +77,7 @@ def plot_poly_fitted(i): polygroup[it,:] = data_prep.fit_poly(sub_x, sub_y, 2) # [end fitting pycon] +print("batch") # [begin batch pycon] with Timer(): rbatch = data_prep.fit_polys(xdata, ydata, 2) @@ -80,6 +85,8 @@ def plot_poly_fitted(i): print(rbatch.shape) # Verify batch. -assert (rbatch[i] == polygroup[i]).all() +assert len(rbatch) == len(polygroup) +for i in range(1000): + assert (rbatch[i] == polygroup[i]).all() # vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4 tw=79: diff --git a/nsd/11arraydesign/code/Makefile b/nsd/11arraydesign/code/Makefile index 1499ba1..d72bcb0 100644 --- a/nsd/11arraydesign/code/Makefile +++ b/nsd/11arraydesign/code/Makefile @@ -3,22 +3,26 @@ export PYTHONPATH=modmesh-master UNAME_S := $(shell uname -s) ifeq ($(UNAME_S),Darwin) -MKLROOT ?= /opt/intel/mkl -MKLINC ?= ${MKLROOT}/include -MKLLIB ?= ${MKLROOT}/lib -MKLEXT ?= a +# Do not define "HASMKL" macro. +INC += -I/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers +LINKFLAGS := -framework Accelerate else ifeq ($(UNAME_S),Linux) -MKLINC ?= /usr/include/mkl +CXXFLAGS += -DHASMKL -Wl,--no-as-needed +ifeq (${MKLROOT},) +INC += -I/usr/include/mkl MKLLIB ?= /usr/lib/x86_64-linux-gnu -MKLEXT ?= so -CXXFLAGS += -Wl,--no-as-needed +else +INC += -I${MKLROOT}/include +MKLLIB ?= ${MKLROOT}/lib/intel64 endif +MKLEXT ?= so -MKLLINKLINE := \ +LINKFLAGS := \ ${MKLLIB}/libmkl_intel_lp64.${MKLEXT} \ ${MKLLIB}/libmkl_sequential.${MKLEXT} \ ${MKLLIB}/libmkl_core.${MKLEXT} \ -lpthread -lm -ldl +endif PYTHON ?= $(shell which python3) PYTHON_VERSION ?= $(shell $(PYTHON) -c 'import sys; print("%d.%d" % (sys.version_info.major, sys.version_info.minor))') @@ -28,16 +32,12 @@ NUMPY_INC := $(shell python3 -c 'import numpy ; print(numpy.get_include())') MODMESH_ROOT=modmesh_copy MODMESH_PYMOD=$(MODMESH_ROOT)/modmesh/buffer/pymod -INC := -I$(shell python3-config --prefix)/include -I$(MODMESH_ROOT) +INC += -I$(shell python3-config --prefix)/include -I$(MODMESH_ROOT) PYTHON_LIB := $(shell python3-config --prefix)/lib CPATH := $(PYTHON_INC):$(NUMPY_INC) -ifdef YHROOT -CPATH := $(CPATH):$(YHROOT)/usr/$(YHFLAVOR)/include -endif - export CPATH BASES := 01_grid 01_solve_python_loop 01_solve_analytical @@ -64,8 +64,11 @@ testimport: libst #cpp -v /dev/null -o /dev/null -data_prep.so: data_prep.cpp Makefile - g++ $< -o $@ -O3 -fPIC -shared -std=c++17 -lpython$(PYTHON_VERSION) -L$(PYTHON_LIB) ${INC} -I${MKLINC} ${MKLLINKLINE} +data_prep.o: data_prep.cpp Makefile + g++ -c $< -o $@ -O3 -fPIC -std=c++17 ${INC} + +data_prep.so: data_prep.o wrap_SimpleArray.o wrap_ConcreteBuffer.o buffer_pymod.o Makefile + g++ $< wrap_ConcreteBuffer.o wrap_SimpleArray.o buffer_pymod.o -o $@ -shared -std=c++17 -lpython$(PYTHON_VERSION) -L$(PYTHON_LIB) ${LINKFLAGS} solve_cpp_xarray.so: solve_cpp_xarray.cpp Makefile g++ $< -o $@ -O3 -fPIC -shared -std=c++17 -lpython$(PYTHON_VERSION) -L$(PYTHON_LIB) ${INC} @@ -80,7 +83,7 @@ solve_cpp.o: solve_cpp.cpp Makefile g++ -c $< -o $@ -O3 -fPIC -std=c++17 ${INC} solve_cpp.so: solve_cpp.o wrap_SimpleArray.o wrap_ConcreteBuffer.o buffer_pymod.o Makefile - g++ solve_cpp.o wrap_ConcreteBuffer.o wrap_SimpleArray.o buffer_pymod.o -o $@ -shared -std=c++17 -lpython$(PYTHON_VERSION) -L$(PYTHON_LIB) + g++ $< wrap_ConcreteBuffer.o wrap_SimpleArray.o buffer_pymod.o -o $@ -shared -std=c++17 -lpython$(PYTHON_VERSION) -L$(PYTHON_LIB) ../image/03_solve_cpp.png: 03_solve_cpp.py solve_cpp.so ./$< diff --git a/nsd/11arraydesign/code/data_prep.cpp b/nsd/11arraydesign/code/data_prep.cpp index 24921b9..ecb57f0 100644 --- a/nsd/11arraydesign/code/data_prep.cpp +++ b/nsd/11arraydesign/code/data_prep.cpp @@ -1,22 +1,103 @@ #include -#define FORCE_IMPORT_ARRAY -#include +#include +#include + +#include +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include + +#ifdef HASMKL +#include +#include +#else // HASMKL +#ifdef __MACH__ +#include +#include +#endif // __MACH__ +#endif // HASMKL #include #include -#include -#include -#include -#include +namespace modmesh +{ + +namespace python +{ + +void import_numpy() +{ + auto local_import_numpy = []() + { + import_array2("cannot import numpy", false); // or numpy c api segfault. + return true; + }; + if (!local_import_numpy()) + { + throw pybind11::error_already_set(); + } +} + +} /* end namespace python */ + +} /* end namespace modmesh */ + +modmesh::SimpleArray solve +( + modmesh::SimpleArray const & sarr + , modmesh::SimpleArray const & rhs +) +{ + // Populate the column major input by transposing + modmesh::SimpleArray mat( + std::vector{sarr.shape(1), sarr.shape(0)}); + for (size_t i = 0; i < mat.shape(0); ++i) + { + for (size_t j = 0; j < mat.shape(1); ++j) + { + mat(i, j) = sarr(j, i); + } + } + + modmesh::SimpleArray b = rhs; + int n = rhs.size(); + modmesh::SimpleArray ipiv(n); + + int status; + int nn = mat.shape(0); + int bncol = 1; + int bnrow = b.shape(0); + int matnrow = mat.shape(1); + + // FIXME: This call is not yet validated. I am not sure about the + // correctness of the solution. + dgesv_( // column major. + &nn // int * n: number of linear equation + , &bncol // int * nrhs: number of RHS + , mat.data() // double * a: array (lda, n) + , &matnrow // int * lda: leading dimension of array a + , ipiv.data() // int * ipiv: pivot indices + , b.data() // double * b: array (ldb, nrhs) + , &bnrow // int * ldb: leading dimension of array b + , &status // for column major matrix, ldb remains the leading dimension. + ); + + return b; +} // [begin example: single fit] /** * This function calculates the least-square regression of a point cloud to a * polynomial function of a given order. */ -template -xt::xarray fit_poly(AT & xarr, AT & yarr, size_t order) +modmesh::SimpleArray fit_poly +( + modmesh::SimpleArray const & xarr + , modmesh::SimpleArray const & yarr + , size_t start + , size_t stop + , size_t order +) { if (xarr.size() != yarr.size()) { @@ -24,7 +105,7 @@ xt::xarray fit_poly(AT & xarr, AT & yarr, size_t order) } // The rank of the linear map is (order+1). - xt::xarray matrix(std::vector{order+1, order+1}); + modmesh::SimpleArray matrix(std::vector{order+1, order+1}); // Use the x coordinates to build the linear map for least-square // regression. @@ -34,7 +115,7 @@ xt::xarray fit_poly(AT & xarr, AT & yarr, size_t order) { double & val = matrix(it, jt); val = 0; - for (size_t kt=0; kt fit_poly(AT & xarr, AT & yarr, size_t order) // Use the x and y coordinates to build the vector in the right-hand side // of the linear system. - xt::xarray rhs(std::vector{order+1}); + modmesh::SimpleArray rhs(std::vector{order+1}); for (size_t jt=0; jt lhs = xt::linalg::solve(matrix, rhs); + modmesh::SimpleArray lhs = solve(matrix, rhs); std::reverse(lhs.begin(), lhs.end()); // to make numpy.poly1d happy. return lhs; @@ -66,18 +147,19 @@ xt::xarray fit_poly(AT & xarr, AT & yarr, size_t order) * This function calculates the least-square regression of multiple sets of * point clouds to the corresponding polynomial functions of a given order. */ -template -xt::xarray fit_polys +modmesh::SimpleArray fit_polys ( - xt::xarray & xarr, xt::xarray & yarr, size_t order + modmesh::SimpleArray const & xarr + , modmesh::SimpleArray const & yarr + , size_t order ) { size_t xmin = std::floor(*std::min_element(xarr.begin(), xarr.end())); size_t xmax = std::ceil(*std::max_element(xarr.begin(), xarr.end())); size_t ninterval = xmax - xmin; - xt::xarray lhs(std::vector{ninterval, order+1}); - lhs.fill(0); // sentinel. + modmesh::SimpleArray lhs(std::vector{ninterval, order+1}); + std::fill(lhs.begin(), lhs.end(), 0); // sentinel. size_t start=0; for (size_t it=0; it fit_polys if (xarr[stop]>=it+1) { break; } } - // Obtain the slice of the input that is to be processed in this - // iteration. - AT const sub_x = xt::view(xarr, xt::range(start, stop)); - AT const sub_y = xt::view(yarr, xt::range(start, stop)); - // Use the single polynomial helper function. - xt::xarray sub_lhs = fit_poly(sub_x, sub_y, order); - xt::view(lhs, it, xt::all()) = sub_lhs; + auto sub_lhs = fit_poly(xarr, yarr, start, stop, order); + for (size_t jt=0; jt fit_polys */ PYBIND11_MODULE(data_prep, m) { - using array_type = xt::xarray; + // Boilerplate for using the SimpleArray with Python + { + modmesh::python::import_numpy(); + modmesh::python::wrap_ConcreteBuffer(m); + modmesh::python::wrap_SimpleArray(m); + } - xt::import_numpy(); m.def ( "fit_poly" , [] ( - xt::pyarray & xarr_in - , xt::pyarray & yarr_in + pybind11::array_t & xarr_in + , pybind11::array_t & yarr_in , size_t order ) { - std::vector xarr_shape(xarr_in.shape().begin() - , xarr_in.shape().end()); - xt::xarray xarr = xt::adapt(xarr_in.data(), xarr_shape); - - std::vector yarr_shape(yarr_in.shape().begin() - , yarr_in.shape().end()); - xt::xarray yarr = xt::adapt(yarr_in.data(), yarr_shape); - - return fit_poly(xarr, yarr, order); + auto xarr = modmesh::python::makeSimpleArray(xarr_in); + auto yarr = modmesh::python::makeSimpleArray(yarr_in); + auto ret = fit_poly(xarr, yarr, 0, xarr.size(), order); + return modmesh::python::to_ndarray(ret); } ); m.def @@ -142,18 +221,15 @@ PYBIND11_MODULE(data_prep, m) "fit_polys" , [] ( - xt::pyarray & xarr_in - , xt::pyarray & yarr_in + pybind11::array_t & xarr_in + , pybind11::array_t & yarr_in , size_t order ) { - std::vector xarr_shape(xarr_in.shape().begin() - , xarr_in.shape().end()); - xt::xarray xarr = xt::adapt(xarr_in.data(), xarr_shape); - std::vector yarr_shape(yarr_in.shape().begin() - , yarr_in.shape().end()); - xt::xarray yarr = xt::adapt(yarr_in.data(), yarr_shape); - return fit_polys(xarr, yarr, order); + auto xarr = modmesh::python::makeSimpleArray(xarr_in); + auto yarr = modmesh::python::makeSimpleArray(yarr_in); + auto ret = fit_polys(xarr, yarr, order); + return modmesh::python::to_ndarray(ret); } ); }