# Experiments with vectorized functions

In [1]:
from h3.unstable import vect

import h3.api.numpy_int as h3i
import h3.api.basic_str as h3s

import numpy as np



# `geo_to_h3` times

In [2]:
N = 1_000_000

lats = np.random.uniform(0, 90, N) 
lons = np.random.uniform(0, 90, N)

In [3]:
%%timeit
[h3s.geo_to_h3(lat, lon, 10) for lat, lon in zip(lats, lons)]

2.1 s ± 57.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [4]:
%%timeit
[h3i.geo_to_h3(lat, lon, 10) for lat, lon in zip(lats, lons)]

1.63 s ± 7.51 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
%%timeit
vect.geo_to_h3(lats, lons, 9)

802 ms ± 13.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Haversine distances between cells

compute haversine distances between h3 cells, using
integer representation (avoiding conversion to/from `str` representation)

In [6]:
def rand_hexes(N):
    lats = np.random.uniform(0, 90, N) 
    lons = np.random.uniform(0, 90, N)

    out = np.array(
        [h3i.geo_to_h3(a, o, 9) for a,o in zip(lats, lons)],
        dtype = 'uint64',
    )
    
    return out

In [7]:
N = 1_000_000

a = rand_hexes(N)
b = rand_hexes(N)

In [8]:
%%timeit
vect.cell_haversine(a, b)

1 s ± 22.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Multithreading

Since these functions release the Python GIL, we can acheive some speedups via parallelism on multicore machines.

Timings below are run on a quad-core MacBook.

## `geo_to_h3`

In [9]:
from concurrent.futures import ThreadPoolExecutor

N = 100_000
K = 10

geos = [
    # lat/lng vectors
    (np.random.uniform(0, 90, N), np.random.uniform(0, 90, N))
    for _ in range(K)
]

def foo(geo):
    lats, lngs = geo
    out = vect.geo_to_h3(lats, lngs, 9)
    return out

In [10]:
%%timeit
out = list(map(foo, geos))

821 ms ± 30.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%%timeit
with ThreadPoolExecutor(max_workers=4) as ex:
    out = list(ex.map(foo, geos))

248 ms ± 10.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## `cell_haversine`



In [12]:
N = 100_000
K = 10

pairs = [
    (rand_hexes(N), rand_hexes(N))
    for _ in range(K)
]

def foo(ab):
    a, b = ab
    out = vect.cell_haversine(a, b)
    return out

In [13]:
%%timeit
out = list(map(foo, pairs))

1.03 s ± 19.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
%%timeit
with ThreadPoolExecutor(max_workers=4) as ex:
    out = list(ex.map(foo, pairs))

339 ms ± 21.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
