Skip to content

Commit

Permalink
Merge pull request #197 from vaexio/superhash
Browse files Browse the repository at this point in the history
Superhash: binby, groupby, unique, value_counts and xarray support
  • Loading branch information
maartenbreddels committed Apr 24, 2019
2 parents 978462f + 526a75b commit 7864b50
Show file tree
Hide file tree
Showing 45 changed files with 3,675 additions and 276 deletions.
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,9 @@
[submodule "packages/vaex-core/vendor/string-view-lite"]
path = packages/vaex-core/vendor/string-view-lite
url = https://github.com/martinmoene/string-view-lite
[submodule "packages/vaex-core/vendor/hopscotch-map"]
path = packages/vaex-core/vendor/hopscotch-map
url = https://github.com/Tessil/hopscotch-map
[submodule "packages/vaex-core/vendor/flat_hash_map"]
path = packages/vaex-core/vendor/flat_hash_map
url = https://github.com/skarupke/flat_hash_map
5 changes: 1 addition & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,7 @@ before_install:
- source deactivate
- source activate test-environment
- which pip
- pip install pybind11
# - conda install -c conda-forge pandas kapteyn # these extra installs should disappear
# - conda create --name dev --clone test
# - pip install -r requirements.txt
- pip install -r requirements.txt
install:
- source activate test-environment
- (cd packages/vaex-core; pip install -v .)
Expand Down
4 changes: 2 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ install:
- conda config --set always_yes yes --set changeps1 no
- conda update -q conda
- conda info -a
- "conda create -q -n test-environment -c conda-forge python=%PYTHON_VERSION% numpy scipy pyqt matplotlib pyopengl h5py numexpr astropy tornado cython pandas runipy cython pytest numba pyarrow>=0.12 graphviz python-graphviz pcre"
- "conda create -q -n test-environment -c conda-forge python=%PYTHON_VERSION% numpy scipy pyqt matplotlib pyopengl h5py numexpr astropy tornado cython pandas runipy cython pytest numba pyarrow graphviz python-graphviz pcre"
- activate test-environment
- pip install pybind11
- pip install "numpy>=1.13" "pyarrow>=0.12"
- pip install -r requirements.txt
- pushd packages\vaex-core && pip install . && popd
- pushd packages\vaex-hdf5 && pip install . && popd
Expand Down
38 changes: 32 additions & 6 deletions packages/vaex-core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,17 @@ def __str__(self):
if platform.system().lower() == 'windows':
extra_compile_args = ["/EHsc"]
else:
# TODO: maybe enable these flags for non-wheel/conda builds? ["-mtune=native", "-march=native"]
extra_compile_args = ["-std=c++11", "-mfpmath=sse", "-O3", "-funroll-loops"]
extra_compile_args.append("-g")
if sys.platform == 'darwin':
extra_compile_args.append("-mmacosx-version-min=10.9")

# on windows (Conda-forge builds), the dirname is an absolute path
extension_vaexfast = Extension("vaex.vaexfast", [os.path.relpath(os.path.join(dirname, "src/vaexfast.cpp"))],
include_dirs=[get_numpy_include()],
extra_compile_args=extra_compile_args)
extension_strings = Extension("vaex.strings", [os.path.relpath(os.path.join(dirname, "src/strings.cpp"))],
extension_strings = Extension("vaex.superstrings", [os.path.relpath(os.path.join(dirname, "src/strings.cpp"))],
include_dirs=[
get_numpy_include(),
get_pybind_include(),
Expand All @@ -88,10 +90,34 @@ def __str__(self):
extra_compile_args=extra_compile_args,
libraries=['pcre', 'pcrecpp']
)
extension_superutils = Extension("vaex.superutils", [os.path.relpath(os.path.join(dirname, "src/superutils.cpp"))],
include_dirs=[get_numpy_include(), get_pybind_include(),
get_pybind_include(user=True)],
extra_compile_args=extra_compile_args)
extension_superutils = Extension("vaex.superutils", [
os.path.relpath(os.path.join(dirname, "src/hash_object.cpp")),
os.path.relpath(os.path.join(dirname, "src/hash_primitives.cpp")),
os.path.relpath(os.path.join(dirname, "src/superutils.cpp")),
os.path.relpath(os.path.join(dirname, "src/hash_string.cpp")),
],
include_dirs=[
get_numpy_include(), get_pybind_include(),
get_pybind_include(user=True),
'vendor/flat_hash_map',
'vendor/sparse-map/include',
'vendor/hopscotch-map/include',
'vendor/string-view-lite/include'
],
extra_compile_args=extra_compile_args)

extension_superagg = Extension("vaex.superagg", [
os.path.relpath(os.path.join(dirname, "src/superagg.cpp")),
],
include_dirs=[
get_numpy_include(), get_pybind_include(),
get_pybind_include(user=True),
'vendor/flat_hash_map',
'vendor/sparse-map/include',
'vendor/hopscotch-map/include',
'vendor/string-view-lite/include'
],
extra_compile_args=extra_compile_args)

setup(name=name + '-core',
version=version,
Expand All @@ -104,7 +130,7 @@ def __str__(self):
license=license,
package_data={'vaex': ['test/files/*.fits', 'test/files/*.vot', 'test/files/*.hdf5']},
packages=['vaex', 'vaex.core', 'vaex.file', 'vaex.test', 'vaex.ext', 'vaex.misc'],
ext_modules=[extension_vaexfast] if on_rtd else [extension_vaexfast, extension_strings, extension_superutils],
ext_modules=[extension_vaexfast] if on_rtd else [extension_vaexfast, extension_strings, extension_superutils, extension_superagg],
zip_safe=False,
entry_points={
'console_scripts': ['vaex = vaex.__main__:main'],
Expand Down
21 changes: 21 additions & 0 deletions packages/vaex-core/src/hash.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// #include "flat_hash_map.hpp"
// #include "unordered_map.hpp"
#include "tsl/hopscotch_set.h"
#include "tsl/hopscotch_map.h"

namespace vaex {

template<class Key, class Value, class Hash=std::hash<Key>, class Compare=std::equal_to<Key>>
// using hashmap = ska::flat_hash_map<Key, Value, Hash, Compare>;
using hashmap = tsl::hopscotch_map<Key, Value, Hash, Compare>;
// template<class Key, class Hash, class Compare>
// using hashset = tsl::hopscotch_set<Key, Hash, Compare>;

// we cannot modify .second, instead use .value()
// see https://github.com/Tessil/hopscotch-map
template<class I, class V>
inline void set_second(I& it, V &&value) {
it.value() = value;
}

}

0 comments on commit 7864b50

Please sign in to comment.