### Using an external Rust library to speed up lon, lat to [BNG](https://en.wikipedia.org/wiki/Ordnance_Survey_National_Grid) conversion

In [13]:
import numpy as np
import pandas as pd
import math
from ctypes import cdll, c_float, Structure, ARRAY, POINTER, c_int32, c_size_t, c_void_p, cast
from sys import platform
from bng import bng
import pyproj

In [2]:
    if platform == "darwin":
        ext = "dylib"
    else:
        ext = "so"

### Setting up the Rust library. See [here](https://github.com/alexcrichton/rust-ffi-examples/tree/master/python-to-rust) for more

Ensure you've built your Rust library using `cargo build --release`, or the next step will fail.

In [22]:
# hacky: http://stackoverflow.com/a/30789980/416626
class Int32_2(Structure):
    _fields_ = [("array", c_int32 * 2)]

lib = cdll.LoadLibrary('target/release/liblonlat_bng.' + ext)
# define argument types
rust_bng = lib.convert
rust_bng.argtypes = [c_float, c_float]
rust_bng.restype = Int32_2

rust_bng_vec = lib.convert_vec_py
rust_bng.argtypes = [POINTER(c_float), c_size_t, POINTER(c_float), c_size_t]
rust_bng_vec.restype = POINTER(Int32_2)


class FFITuple(Structure):
    _fields_ = [("a", c_float),
                ("b", c_float)]

class FFIArray(Structure):
    _fields_ = [("data", c_void_p),
                ("len", c_size_t)]

rust_bng_c = lib.convert_vec_c
rust_bng_c.argtypes = (FFIArray, FFIArray)
rust_bng_c.restype = FFIArray

def rust_bng_c_wrap(lons, lats):
    lons2 = (c_float * len(lons))(*lons)
    lats2 = (c_float * len(lats))(*lats)

    lat_array = FFIArray(cast(lons2, c_void_p), len(lons2))
    lon_array = FFIArray(cast(lats2, c_void_p), len(lats2))
    result = rust_bng_c(lon_array, lat_array)

    results = cast(result.data, POINTER(FFITuple))

    for i in xrange(result.len):
        tupl = results[i]
        print tupl.a, tupl.b
#     res = [zip(results[i].a, results[i].b) for i in xrange(result.len)]




def rust_bng_vec_wrap(ls_1, ls_2):
    """ Wrapper around the foreign converter function """
    length = len(ls_1)
    arr1 = (c_float * length)(*ls_1)
    arr2 = (c_float * length)(*ls_2)
    result = rust_bng_vec(arr1, length, arr2, length)
    container = [(result[i].array[0], result[i].array[1]) for i in xrange(length)]
    return container

### Simple test of average conversion speed, Python version

In [5]:
%%timeit -r 10
bng(
    51.44533267, -0.32824866)

10000 loops, best of 10: 24.1 µs per loop


### Simple test of average conversion speed, Rust 1.0 version

In [8]:
%%timeit -r 10
rust_bng(-0.32824866, 51.44533267)

The slowest run took 1825.23 times longer than the fastest. This could mean that an intermediate result is being cached 
1000000 loops, best of 10: 1.42 µs per loop


In [8]:
# UK bounding box
N = 55.811741
E = 1.768960
S = 49.871159
W = -6.379880

### A slightly more realistic test, 100k random points within the UK

In [8]:
df = pd.DataFrame({
        'lon': np.random.uniform(W, E, [100000]),
        'lat': np.random.uniform(N, S, [100000])})

In [10]:
%%timeit -r 10
eastings_northings = df.apply(
    lambda x: bng(x['lat'], x['lon']),
    axis=1)

1 loops, best of 10: 4.11 s per loop


In [9]:
%%timeit -r 10
rust_eastings_northings = df.apply(
    lambda x: rust_bng(c_float(x['lon']), c_float(x['lat'])),
    axis=1)

1 loops, best of 10: 2.05 s per loop


### So far: around a 15x improvement on the simple test, around 2x on the "realistic" test

In [22]:
bng = pyproj.Proj(init='epsg:27700')
wgs84 = pyproj.Proj(init='epsg:4326')

### Pyproj

In [5]:
%%timeit -r 10
pyproj.transform(wgs84, bng, -0.32824866, 51.44533267)

The slowest run took 7.25 times longer than the fastest. This could mean that an intermediate result is being cached 
100000 loops, best of 10: 8.55 µs per loop


### Using lists instead of a DataFrame

In [9]:
lon_ls = list(np.random.uniform(W, E, [100000]))
lat_ls = list(np.random.uniform(N, S, [100000]))

In [43]:
%%timeit -r 10
res = [rust_bng(i, j) for i, j in zip(lon_ls, lat_ls)]

1 loops, best of 10: 261 ms per loop


In [42]:
%%timeit -r 10
res = [bng(i, j) for i, j in zip(lon_ls, lat_ls)]

1 loops, best of 10: 804 ms per loop


In [23]:
%%timeit -r 10
proj_res = zip(*pyproj.transform(wgs84, bng, lon_ls, lat_ls))

10 loops, best of 10: 93.5 ms per loop


### ~14x speedup over pure Python, ~3x slower than Pyproj

### Let's use the vectorised version

In [23]:
# %%timeit -r 10
baz = rust_bng_c_wrap([.1, .2, .3], [.3, .2, .1])

9.19363896474e-40 7.73031342519e-39
9.03757635477e-40 nan
8.8815137448e-40 nan


In [11]:
baz[0]

(426987, 57743)