### Using an external Rust library to speed up lon, lat to [BNG](https://en.wikipedia.org/wiki/Ordnance_Survey_National_Grid) conversion

In [1]:
import numpy as np
import pandas as pd
import math
from ctypes import cdll, c_float, Structure, ARRAY, POINTER, c_int32, c_uint32, c_size_t, c_void_p, cast
from sys import platform
from bng import bng
import pyproj
import ipdb
from array import array
# from util import convertbng, convertlonlat

### Setting up the Rust library. See [here](https://github.com/alexcrichton/rust-ffi-examples/tree/master/python-to-rust) for more

Ensure you've built your Rust library using `cargo build --release`, or the next step will fail.

The boilerplate below can easily be hidden in a wrapper function â€“ it's just here to demonstrate how to call into a shared Rust lib using FFI.

In [2]:
if platform == "darwin":
    ext = "dylib"
else:
    ext = "so"
    
lib = cdll.LoadLibrary('target/release/liblonlat_bng.' + ext)

Define the `ctypes` structures for lon, lat --> BNG conversion

In [3]:
class BNG_FFITuple(Structure):
    _fields_ = [("a", c_uint32),
                ("b", c_uint32)]

class BNG_FFIArray(Structure):
    _fields_ = [("data", c_void_p),
                ("len", c_size_t)]

    # Allow implicit conversions from a sequence of 32-bit unsigned
    # integers.
    @classmethod
    def from_param(cls, seq):
        return seq if isinstance(seq, cls) else cls(seq)

    # Wrap sequence of values. You can specify another type besides a
    # 32-bit unsigned integer.
    def __init__(self, seq, data_type = c_float):
        buf = array('f', seq)
        array_type = data_type * len(buf)
        raw_seq = array_type.from_buffer(buf)
        self.data = cast(raw_seq, c_void_p)
        self.len = len(seq)

class BNG_RESTuple(Structure):
    _fields_ = [("e", BNG_FFIArray),
                ("n", BNG_FFIArray)]        

# A conversion function that cleans up the result value to make it
# nicer to consume.
def bng_void_array_to_tuple_list(restuple, _func, _args):
    eastings = cast(restuple.e.data, POINTER(c_int32 * restuple.e.len))[0]
    northings = cast(restuple.n.data, POINTER(c_int32 * restuple.n.len))[0]
    res_list = [list(eastings), list(northings)]
    drop_bng_array(restuple.e, restuple.n)
    return res_list

Define the `ctypes` structures for BNG --> lon, lat conversion

In [4]:
class LONLAT_FFITuple(Structure):
    _fields_ = [("a", c_float),
                ("b", c_float)]

class LONLAT_FFIArray(Structure):
    _fields_ = [("data", c_void_p),
                ("len", c_size_t)]

    # Allow implicit conversions from a sequence of 32-bit unsigned
    # integers.
    @classmethod
    def from_param(cls, seq):
        return seq if isinstance(seq, cls) else cls(seq)

    # Wrap sequence of values. You can specify another type besides a
    # 32-bit unsigned integer.
    def __init__(self, seq, data_type = c_uint32):
        array_type = data_type * len(seq)
        raw_seq = array_type(*seq)
        self.data = cast(raw_seq, c_void_p)
        self.len = len(seq)

class LONLAT_RESTuple(Structure):
    _fields_ = [("lon", LONLAT_FFIArray),
                ("lat", LONLAT_FFIArray)]           

# A conversion function that cleans up the result value to make it
# nicer to consume.
def lonlat_void_array_to_tuple_list(array, _func, _args):
    lons = cast(restuple.lon.data, POINTER(c_float * restuple.lon.len))[0]
    lats = cast(restuple.lat.data, POINTER(c_float * restuple.lat.len))[0]
    res_list = [list(lons), list(lats)]
    drop_bng_array(restuple.lon, restuple.lat)
    return res_list

Define `ctypes` input and return parameters

In [5]:
# Multi-threaded
convert_bng = lib.convert_to_bng_threaded
convert_bng.argtypes = (BNG_FFIArray, BNG_FFIArray)
convert_bng.restype = BNG_RESTuple
convert_bng.errcheck = bng_void_array_to_tuple_list

convert_lonlat = lib.convert_to_lonlat_threaded
convert_lonlat.argtypes = (LONLAT_FFIArray, LONLAT_FFIArray)
convert_lonlat.restype = LONLAT_RESTuple
convert_lonlat.errcheck = lonlat_void_array_to_tuple_list

# cleanup
drop_bng_array = lib.drop_int_array
drop_bng_array.argtypes = (BNG_FFIArray, BNG_FFIArray)
drop_bng_array.restype = None
drop_ll_array = lib.drop_float_array
drop_ll_array.argtypes = (LONLAT_FFIArray, LONLAT_FFIArray)
drop_ll_array.restype = None

In [6]:
# def convertbng(lons, lats):
#     """ Single-threaded wrapper """
#     return convert_vec(lons, lats)

def convertbng_threaded(lons, lats):
    """ Multi-threaded lon lat to BNG wrapper """
    return convert_bng(lons, lats)

def convertlonlat_threaded(eastings, northings):
    """ Multi-threaded BNG to lon, lat wrapper """
    return convert_lonlat(eastings, northings)

## Simple test of average conversion speed, Python version

## Test: 1MM random points within the UK

In [7]:
# UK bounding box
N = 55.811741
E = 1.768960
S = 49.871159
W = -6.379880

bng = pyproj.Proj(init='epsg:27700')
wgs84 = pyproj.Proj(init='epsg:4326')

num_coords = 1000000
lon_ls = list(np.random.uniform(W, E, [num_coords]))
lat_ls = list(np.random.uniform(S, N, [num_coords]))

### Pure Python

In [8]:
%%timeit -r10
[bng(lat, lon) for lat, lon in zip(lat_ls, lon_ls)]

1 loops, best of 10: 547 ms per loop


### Pyproj

In [8]:
%%timeit -r50
zip(*pyproj.transform(wgs84, bng, lon_ls, lat_ls))

1 loops, best of 50: 510 ms per loop


### Multithreaded Rust

In [8]:
%%timeit -r50
zip(*convertbng_threaded(lon_ls, lat_ls))

1 loops, best of 50: 630 ms per loop


## Pyproj is now 2.3x (130%) faster, multithreaded Rust version is 5x faster than pure Python

In [16]:
%%timeit
convertbng_threaded(
    np.random.uniform(W, E, [1000000]),
    np.random.uniform(S, N, [1000000])
)

1 loops, best of 3: 1.95 s per loop


In [18]:
pyproj.transform(wgs84, bng, lon_ls[:10], lat_ls[:10])

([239848.09803605592,
  565243.9292678027,
  437063.56213706615,
  125805.39941714873,
  100663.86697290034,
  404354.5908999103,
  352852.71625587915,
  580194.2000104681,
  554613.7675312255,
  399562.6743980016],
 [526043.8918870178,
  155476.9754103778,
  292795.2483636608,
  557740.6214864869,
  232161.1257226668,
  15942.997377336476,
  375747.98735284165,
  11242.398103041007,
  432948.89253161754,
  480867.93841965345])

In [19]:
df = pd.DataFrame({
        'lons': lon_ls,
        'lats': lat_ls
        })

In [20]:
%%timeit
df['eastings'], df['northings'] = zip(*convertbng_threaded(df.lons.values, df.lats.values))
df.head()

1 loops, best of 3: 2.51 s per loop


In [None]:
%%timeit
df['eastings'], df['northings'] = df.apply(lambda x: zip(*convert_bng(df.lons, df.lats)), axis=1)

In [9]:
from array import array

def _copytobuffer(x):
    """
    return a copy of x as an object that supports the python Buffer
    API (python array if input is float, list or tuple, numpy array
    if input is a numpy array). returns copyofx, isfloat, islist,
    istuple (islist is True if input is a list, istuple is true if
    input is a tuple, isfloat is true if input is a float).
    """
    # make sure x supports Buffer API and contains doubles.
    isfloat = False; islist = False; istuple = False
    # first, if it's a numpy array scalar convert to float
    # (array scalars don't support buffer API)
    if hasattr(x,'shape'):
        if x.shape == ():
            return _copytobuffer_return_scalar(x)
        else:
            try:
                # typecast numpy arrays to double.
                # (this makes a copy - which is crucial
                #  since buffer is modified in place)
                x.dtype.char
                # Basemap issue
                # https://github.com/matplotlib/basemap/pull/223/files
                # (deal with input array in fortran order)
                inx = x.copy(order="C").astype('d')
                # inx,isfloat,islist,istuple
                return inx,False,False,False
            except:
                try: # perhaps they are Numeric/numarrays?
                    # sorry, not tested yet.
                    # i don't know Numeric/numarrays has `shape'.
                    x.typecode()
                    inx = x.astype('d')
                    # inx,isfloat,islist,istuple
                    return inx,False,False,False
                except:
                    raise TypeError('input must be an array, list, tuple or scalar')
    else:
        # perhaps they are regular python arrays?
        if hasattr(x, 'typecode'):
            #x.typecode
            inx = array('d',x)
        # try to convert to python array
        # a list.
        elif type(x) == list:
            inx = array('d',x)
            islist = True
        # a tuple.
        elif type(x) == tuple:
            inx = array('d',x)
            istuple = True
        # a scalar?
        else:
            return _copytobuffer_return_scalar(x)
    return inx,isfloat,islist,istuple

In [29]:
res = _copytobuffer(lon_ls[:10])

In [31]:
res[0]

array('d', [-5.5472736759530745, -0.6037926908174169, -4.067424377853696, -1.3163081704097914, -5.584441842323121, -1.0134325199385756, -6.1384195685599785, -4.15023342600273, -4.4873307936127205, -0.33716790963166776])

In [32]:
c_void_p.from_buffer(res[0])

c_void_p(13841303730197149819L)

In [4]:
ls = [1., 2., 3.]
arr = array('f', ls)
arr

array('f', [1.0, 2.0, 3.0])

In [6]:
# old-style
bar = BNG_FFIArray(ls)
bar.len, bar.data

(3L, 4504774352)