From 256eaaa7bd40acc2d86c71a02ea7a44662bd3a34 Mon Sep 17 00:00:00 2001 From: Yiqiu Wang Date: Tue, 29 Nov 2022 14:30:40 -0500 Subject: [PATCH 1/9] Update build_wheels.yml --- .github/workflows/build_wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index bb628e5..68f154d 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -18,7 +18,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-20.04, macos-11] + os: [ubuntu-latest, macos-latest, windows-latest] steps: - uses: actions/checkout@v3 From 814c4d506f4fc2b97a08754b7a83fc4d9efbfe74 Mon Sep 17 00:00:00 2001 From: Yiqiu Wang Date: Tue, 29 Nov 2022 14:43:55 -0500 Subject: [PATCH 2/9] Fix windows compatibility --- include/dbscan/pbbs/sequence.h | 4 ++-- include/dbscan/pbbs/utils.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/dbscan/pbbs/sequence.h b/include/dbscan/pbbs/sequence.h index 858cfa4..ab5b62c 100644 --- a/include/dbscan/pbbs/sequence.h +++ b/include/dbscan/pbbs/sequence.h @@ -28,8 +28,8 @@ #include "utils.h" // For fast popcount -#include -#include +// #include +// #include using namespace std; diff --git a/include/dbscan/pbbs/utils.h b/include/dbscan/pbbs/utils.h index ade99ce..afddc80 100644 --- a/include/dbscan/pbbs/utils.h +++ b/include/dbscan/pbbs/utils.h @@ -26,7 +26,7 @@ #include #include "parallel.h" - +/* #if defined(__APPLE__) #define PTCMPXCH " cmpxchgl %2,%1\n" #else @@ -39,6 +39,7 @@ static int __ii = mallopt(M_MMAP_MAX,0); static int __jj = mallopt(M_TRIM_THRESHOLD,-1); #endif +*/ #define newA(__E,__n) (__E*) malloc((__n)*sizeof(__E)) From 7a5ee1a0c9e420cd6567fb0d54c5df83355870c1 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Tue, 29 Nov 2022 11:47:28 -0800 Subject: [PATCH 3/9] Automatic versioning from Git tags --- .gitignore | 1 + MANIFEST.in | 2 ++ pyproject.toml | 11 ++++--- {src => pythonmodule}/__init__.py | 11 +++++-- setup.py | 55 ++++++++++--------------------- src/dbscanmodule.cpp | 6 ++-- 6 files changed, 41 insertions(+), 45 deletions(-) rename {src => pythonmodule}/__init__.py (91%) diff --git a/.gitignore b/.gitignore index 7c53c88..bf179db 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,4 @@ dbscan/build/ .DS_Store dbscan.egg-info/ __pycache__ +pythonmodule/_version.py diff --git a/MANIFEST.in b/MANIFEST.in index 3a46ee3..323e43d 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,3 @@ recursive-include include * +global-exclude *.py[co] .DS_Store +exclude src/dbscan diff --git a/pyproject.toml b/pyproject.toml index 902becf..954b06f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,18 +33,21 @@ example = [ homepage = "https://github.com/wangyiqiu/dbscan-python" [build-system] -requires = ["setuptools", "wheel", "oldest-supported-numpy"] +requires = ["setuptools", "setuptools_scm", "wheel", "oldest-supported-numpy"] + +[tool.setuptools_scm] +write_to = "pythonmodule/_version.py" [tool.cibuildwheel] # TODO: Update the pypy to the latest version. This is unnecesary for cpython # because it will be compatible with all newer releases. Changing the versions # could be convienient in the future, but for now, totally backwards compatible # wheels work nicely. -build = "cp36-* cp38-macosx_arm64 cp39-win_arm64 pp39-*" +build = "cp36-* cp38-macosx_arm64 cp39-win_arm64" # pp39-* skip = "*-win32 *-manylinux_i686 *-musllinux_*" [tool.cibuildwheel.macos] -archs = ["x86_64"] # "arm64" +archs = ["x86_64", "arm64"] [tool.cibuildwheel.linux] -archs = ["auto"] # "aarch64" +archs = ["auto", "aarch64"] diff --git a/src/__init__.py b/pythonmodule/__init__.py similarity index 91% rename from src/__init__.py rename to pythonmodule/__init__.py index 952d8b5..40fef1a 100644 --- a/src/__init__.py +++ b/pythonmodule/__init__.py @@ -1,7 +1,14 @@ from ._dbscan import * -from ._dbscan import __version__ -__all__ = ('DBSCAN',) +# Load version from _version.py if available +from . import _dbscan +__all__ = tuple(v for v in dir(_dbscan) if v.startswith('_')) +try: + from ._dbscan import __version__ + __all__ += ('__version__',) +except: + pass +del _dbscan try: # Create scikit-learn wrapper if possible diff --git a/setup.py b/setup.py index dd771d3..6b7fad6 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,11 @@ import setuptools from setuptools.extension import Extension +import setuptools_scm import numpy + import ast +import glob +import json import sys import os @@ -20,18 +24,6 @@ def initialize_options(self): # Give up if it doesn't work. Not a big deal. pass -# Read README.md and set it as the description -with open("README.md", "r", encoding="utf-8") as fh: - long_description = fh.read() - -# Get the module version from src/dbscanmodule.cpp -with open("src/dbscanmodule.cpp", "r") as fh: - version = fh.readline().strip() - assert version.startswith('#define DBSCAN_VERSION ') - version = ast.literal_eval(version.replace('#define DBSCAN_VERSION ', '')) - assert isinstance(version, str) - - if os.name == 'nt': # Windows compile time arguments extra_compile_args = ["/std:c++17", "/Ot"] @@ -39,42 +31,31 @@ def initialize_options(self): # Mac/Linux GCC compile time arguments extra_compile_args = ["-std=c++17", "-pthread", "-g", "-O3", "-fPIC", "-Wno-unused"] +install_requires = [f'numpy>={numpy.__version__},<2'] +if sys.hexversion < 0x03080000: + install_requires.append('importlib-resources; python_version<3.8') + +depends = [f for f in glob.glob('include/**', recursive=True) if not os.path.isdir(f)] + setuptools.setup( - name="dbscan", - version=version, - author="Yiqiu Wang", - author_email="yiqiu_wang@icloud.com", - description="Theoretically efficient and practical parallel DBSCAN", - long_description=long_description, - long_description_content_type="text/markdown", - keywords='cluster clustering density dbscan', - url="https://github.com/wangyiqiu/dbscan-python", - license='MIT', packages=('dbscan',), - package_dir={'dbscan': 'src'}, + package_dir={'dbscan': 'pythonmodule'}, ext_modules=[Extension( "dbscan._dbscan", ["src/dbscanmodule.cpp", "src/capi.cpp"], language = 'c++', extra_compile_args=extra_compile_args, include_dirs=[numpy.get_include(), 'include'], + depends=depends, py_limited_api=True, - define_macros=[('Py_LIMITED_API', '0x03020000'), ('NPY_NO_DEPRECATED_API', 'NPY_1_7_API_VERSION')] + define_macros=[ + ('Py_LIMITED_API', '0x03020000'), + ('NPY_NO_DEPRECATED_API', 'NPY_1_7_API_VERSION'), + ('DBSCAN_VERSION', json.dumps(setuptools_scm.get_version())), + ] )], - classifiers=[ - 'Development Status :: 2 - Pre-Alpha', - 'Intended Audience :: Science/Research', - 'Intended Audience :: Developers', - "License :: OSI Approved :: MIT License", - 'Programming Language :: C++', - 'Programming Language :: Python :: 3', - 'Topic :: Software Development', - 'Topic :: Scientific/Engineering', - ], python_requires=f'>={sys.version_info.major}.{sys.version_info.minor},<4', - install_requires=[ - f'numpy>={numpy.__version__},<2' - ], + install_requires=install_requires, extras_require={ 'scikit-learn': ['scikit-learn'], 'example': ['scikit-learn', 'matplotlib'], diff --git a/src/dbscanmodule.cpp b/src/dbscanmodule.cpp index 5cb911f..7668ceb 100644 --- a/src/dbscanmodule.cpp +++ b/src/dbscanmodule.cpp @@ -1,5 +1,3 @@ -#define DBSCAN_VERSION "0.0.10" - #include "Python.h" #include "numpy/arrayobject.h" #include "dbscan/capi.h" @@ -118,7 +116,11 @@ PyInit__dbscan(void) { import_array(); PyObject *module = PyModule_Create(&dbscanModule); +#ifdef DBSCAN_VERSION PyModule_AddStringConstant(module, "__version__", DBSCAN_VERSION); +#endif + PyModule_AddIntMacro(module, DBSCAN_MIN_DIMS); + PyModule_AddIntMacro(module, DBSCAN_MAX_DIMS); return module; } From 4800b1ec56c9e30eb769ebffe6489d045f1beccb Mon Sep 17 00:00:00 2001 From: Yiqiu Wang Date: Tue, 29 Nov 2022 14:53:38 -0500 Subject: [PATCH 4/9] Fix windows compatibility --- executable/main.cpp | 1 - include/dbscan/algo.h | 3 +-- include/dbscan/pbbs/gettime.h | 3 +++ 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/executable/main.cpp b/executable/main.cpp index 5278dfb..65b9db6 100644 --- a/executable/main.cpp +++ b/executable/main.cpp @@ -4,7 +4,6 @@ #include "dbscan/point.h" #include "dbscan/geometryIO.h" #include "dbscan/pbbs/parallel.h" -#include "dbscan/pbbs/gettime.h" #include "dbscan/pbbs/parseCommandLine.h" diff --git a/include/dbscan/algo.h b/include/dbscan/algo.h index 6169c14..b636685 100644 --- a/include/dbscan/algo.h +++ b/include/dbscan/algo.h @@ -5,7 +5,7 @@ #include "dbscan/shared.h" #include "dbscan/grid.h" #include "dbscan/coreBccp.h" -#include "dbscan/pbbs/gettime.h" +// #include "dbscan/pbbs/gettime.h" #include "dbscan/pbbs/parallel.h" #include "dbscan/pbbs/sampleSort.h" #include "dbscan/pbbs/unionFind.h" @@ -96,7 +96,6 @@ int DBSCAN(intT n, floatT* PF, double epsilon, intT minPts, bool* coreFlagOut, i auto uf = unionFind(G->numCell()); - timing t1; parallel_for(0, G->numCell(), [&](intT i) { if (ccFlag[i]) { auto procTj = [&](cellT* cj) { diff --git a/include/dbscan/pbbs/gettime.h b/include/dbscan/pbbs/gettime.h index d9dc4de..63be5fa 100644 --- a/include/dbscan/pbbs/gettime.h +++ b/include/dbscan/pbbs/gettime.h @@ -1,6 +1,7 @@ #ifndef GETTIME_H #define GETTIME_H +/* #include #include #include @@ -92,4 +93,6 @@ struct timing { // #define nextTime(_string) _tm.reportNext(_string); // #define nextTimeN() _tm.reportT(_tm.next()); +*/ + #endif From 551a517a5d5e5f8f192f0e85f4754198e5f79b61 Mon Sep 17 00:00:00 2001 From: Yiqiu Wang Date: Tue, 29 Nov 2022 14:59:53 -0500 Subject: [PATCH 5/9] Fix windows compatibility --- include/dbscan/pbbs/unionFind.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/dbscan/pbbs/unionFind.h b/include/dbscan/pbbs/unionFind.h index 13bea25..3767e55 100644 --- a/include/dbscan/pbbs/unionFind.h +++ b/include/dbscan/pbbs/unionFind.h @@ -38,7 +38,8 @@ struct unionFind { v = find(v); if(u == v) break; if(u > v) swap(u,v); - if(hooks[u] == intMax() && __sync_bool_compare_and_swap(&hooks[u], intMax(), u)){ + // if(hooks[u] == intMax() && __sync_bool_compare_and_swap(&hooks[u], intMax(), u)){ + if(hooks[u] == intMax() && utils::myCAS(&hooks[u], intMax(), u)){ parents[u]=v; break; }} @@ -79,7 +80,8 @@ edgeUnionFind(intT nn): n(nn) { v = find(v); if(u == v) break; if(u > v) swap(u,v); - if(hooks[u].first == intMax() && __sync_bool_compare_and_swap(&hooks[u].first, intMax(), c_from)){ + // if(hooks[u].first == intMax() && __sync_bool_compare_and_swap(&hooks[u].first, intMax(), c_from)){ + if(hooks[u].first == intMax() && utils::myCAS(&hooks[u].first, intMax(), c_from)){ parents[u]=v; hooks[u].second=c_to; break; From c2beb32ca9811befe2a20593e5ee7d9a3536f062 Mon Sep 17 00:00:00 2001 From: Yiqiu Wang Date: Tue, 29 Nov 2022 15:07:44 -0500 Subject: [PATCH 6/9] Fix static allocation --- include/dbscan/kdNode.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/dbscan/kdNode.h b/include/dbscan/kdNode.h index fb5a183..0dea9d5 100644 --- a/include/dbscan/kdNode.h +++ b/include/dbscan/kdNode.h @@ -47,7 +47,8 @@ class kdNode { }} inline void boundingBoxParallel() { - intT P = getWorkers()*8; + // intT P = getWorkers()*8; + static const intT P = 36 * 8; intT blockSize = (n+P-1)/P; pointT localMin[P]; pointT localMax[P]; From afbe1b48c72900ceb7e0401386212df4dc2cae4c Mon Sep 17 00:00:00 2001 From: Yiqiu Wang Date: Tue, 29 Nov 2022 15:14:07 -0500 Subject: [PATCH 7/9] Fix static allocation --- include/dbscan/shared.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/dbscan/shared.h b/include/dbscan/shared.h index 82cd53a..737437c 100644 --- a/include/dbscan/shared.h +++ b/include/dbscan/shared.h @@ -126,7 +126,8 @@ point pMinSerial(point* items, intT n) { template point pMinParallel(point* items, intT n) { point pMin = point(items[0].x); - intT P = getWorkers()*8; + // intT P = getWorkers()*8; + static const intT P = 36 * 8; intT blockSize = (n+P-1)/P; point localMin[P]; for (intT i=0; i Date: Tue, 29 Nov 2022 15:40:25 -0500 Subject: [PATCH 8/9] Update build_wheels.yml --- .github/workflows/build_wheels.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 68f154d..b11e436 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -13,6 +13,24 @@ on: [push, pull_request] # - published jobs: + build-and-test: + name: Build executable and run test + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Build + run: | + mkdir build + cd build + cmake .. + make -j + - name: Test + run: | + cd build + ctest --no-tests=error --output-on-failure + build_wheels: name: Build wheels on ${{ matrix.os }} runs-on: ${{ matrix.os }} From 8021f989277974ccb45ff1faab1adf3194d5dafa Mon Sep 17 00:00:00 2001 From: anivegesana Date: Wed, 30 Nov 2022 20:50:00 -0800 Subject: [PATCH 9/9] Remove x86 Mac builds from development builds --- .github/workflows/build_wheels.yml | 12 +++++++----- pyproject.toml | 9 +++++++-- pythonmodule/__init__.py | 2 +- setup.py | 7 +------ 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index e476f70..096c096 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -19,8 +19,6 @@ jobs: steps: - uses: actions/checkout@v3 - with: - fetch-depth: 0 - name: Build run: | @@ -46,19 +44,22 @@ jobs: fetch-depth: 0 - name: Set up QEMU - if: runner.os == 'Linux' + if: runner.os == 'Linux' && (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags/v')) uses: docker/setup-qemu-action@v2 with: platforms: all - name: Build wheels (development) - if: github.ref != 'refs/heads/master' + if: github.ref != 'refs/heads/master' && !startsWith(github.ref, 'refs/tags/v') uses: pypa/cibuildwheel@v2.11.2 + env: + CIBW_ARCHS_MACOS: "arm64" - name: Build wheels (production) - if: github.ref == 'refs/heads/master' + if: github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags/v') uses: pypa/cibuildwheel@v2.11.2 env: + CIBW_ARCHS_MACOS: "x86_64 arm64" CIBW_ARCHS_LINUX: "auto aarch64" - uses: actions/upload-artifact@v3 @@ -68,6 +69,7 @@ jobs: build_sdist: name: Build source distribution runs-on: ubuntu-latest + if: github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags/v') steps: - uses: actions/checkout@v3 with: diff --git a/pyproject.toml b/pyproject.toml index 0024880..0c0328b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,6 +3,9 @@ name = "dbscan" authors = [ {name = "Yiqiu Wang", email = "yiqiu_wang@icloud.com"} ] +maintainers = [ + {name = "Anirudh Vegesana", email = "avegesan@cs.stanford.edu"} +] description = "Theoretically efficient and practical parallel DBSCAN" readme = "README.md" keywords = ['cluster', 'clustering', 'density', 'dbscan'] @@ -53,8 +56,10 @@ write_to = "pythonmodule/_version.py" build = "cp36-* cp38-macosx_arm64" # pp39-* cp39-win_arm64 skip = "*-win32 *_i686 *-musllinux_*" -[tool.cibuildwheel.macos] -archs = ["x86_64", "arm64"] +# We will only build x86_64 when merged into master to save on compilation +# time. +# [tool.cibuildwheel.macos] +# archs = ["x86_64", "arm64"] # We will only build aarch64 when merged into master to save on compilation # time. diff --git a/pythonmodule/__init__.py b/pythonmodule/__init__.py index ec09bcc..5ddd39f 100644 --- a/pythonmodule/__init__.py +++ b/pythonmodule/__init__.py @@ -4,7 +4,7 @@ from . import _dbscan __all__ = tuple(v for v in dir(_dbscan) if v.startswith('_')) try: - from ._version import __version__ + from ._version import version as __version__ __all__ += ('__version__',) except: pass diff --git a/setup.py b/setup.py index 89eb4fb..2116a12 100644 --- a/setup.py +++ b/setup.py @@ -30,11 +30,6 @@ def initialize_options(self): else: # Mac/Linux GCC compile time arguments extra_compile_args = ["-std=c++17", "-pthread", "-g", "-O3", "-fPIC", "-Wno-unused"] - -install_requires = [f'numpy>={numpy.__version__},<2'] -if sys.hexversion < 0x03080000: - install_requires.append('importlib-resources ; python_version<"3.8"') - depends = [f for f in glob.glob('include/**', recursive=True) if not os.path.isdir(f)] version = setuptools_scm.get_version() @@ -59,7 +54,7 @@ def initialize_options(self): ] )], python_requires=f'>={sys.version_info.major}.{sys.version_info.minor},<4', - install_requires=install_requires, + install_requires=[f'numpy>={numpy.__version__},<2'], extras_require={ 'scikit-learn': ['scikit-learn'], 'example': ['scikit-learn', 'matplotlib'],