In [1]:
import numpy as np
import numba
import time

In [2]:

def pdist_numpy_naive(As: np.ndarray, Bs: np.ndarray) -> np.ndarray:
    (n, k) = As.shape
    (m, k2) = Bs.shape
    assert k == k2

    deltas = As[:, None, :] - Bs[None, :, :]  # n,m,k
    return np.linalg.norm(deltas, axis=2)


def pdist_numpy_hybrid(As: np.ndarray, Bs: np.ndarray) -> np.ndarray:
    (n, k) = As.shape
    (m, k2) = Bs.shape
    assert k == k2

    out = np.empty((n,m))
    for i, row in enumerate(As):
        out[i,:] = np.linalg.norm(Bs-row[np.newaxis,:], axis=1)
    return out


@numba.njit
def pdist_numba(As: np.ndarray, Bs: np.ndarray) -> np.ndarray:
    (n, k) = As.shape
    (m, k2) = Bs.shape
    assert k == k2

    res = np.empty((n, m), dtype=np.float64)
    for i in range(n):
        for j in range(m):
            res[i, j] = np.linalg.norm(As[i] - Bs[j])
    return res

In [3]:
n, m, k = 2001, 1001, 300

a = np.random.random((n,k))
b = np.random.random((m,k))
t0 = time.time()
r0 = pdist_numpy_naive(a,b)
t1 = time.time()
print(f'naive numpy {t1-t0:.2f} s')


t0 = time.time()
r1 = pdist_numpy_hybrid(a,b)
t1 = time.time()
print(f'hybrid numpy {t1-t0:.2f} s')


pdist_numba(a,b) # run once to ensure compilation

t0 = time.time()
r2 = pdist_numba(a,b)
t1 = time.time()
print(f'numba {t1-t0:.2f} s')

naive numpy 2.02 s
hybrid numpy 0.77 s
numba 0.90 s


In [4]:

cython_str = '''
# cython: language_level=3
# distutils: language=c
# cython: cpp_locals=True
# cythhon: binding=False
# cython: infer_types=False
# cython: wraparound=False
# cython: boundscheck=False
# cython: cdivision=True
# cython: overflowcheck=False
# cython: nonecheck=False
# cython: initializedcheck=False
# cython: always_allow_keywords=False
# cython: c_api_binop_methods=True
# distutils: define_macros=NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION


# pdist_cython.pyx

import numpy as np
cimport numpy as npc
from libc.math cimport sqrt
#from cython.parallel import prange

import cython
cimport cython


cdef double[:,:,] pdist_cython(const double[:,:,] As, const double[:,:,] Bs) except *:

    cdef unsigned int n,m,k,k2,i,j,l
    cdef double tmp
    cdef npc.ndarray[double, ndim=2] res
    
    n = As.shape[0]
    k = As.shape[1]
    m = Bs.shape[0]
    #assert k == k2
    
    res = np.empty((n, m))
    
    for i in range(0, n):
        for j in range(0, m):
            #res[i, j] = np.linalg.norm(As[i] - Bs[j])
            tmp = 0
            for l in range(0, k):
                tmp += (As[i,l] - Bs[j,l])**2
            res[i,j] = sqrt(tmp)
    return res
    
cpdef double[:,:,] main(double[:,:,] a, double[:,:,] b) except *:
    return pdist_cython(a, b)
'''
with open('pdist_cython.pyx','w') as f:
    f.write(cython_str)


In [6]:

# setup.py

setup_py_cython_str = '''from distutils.core import setup
from Cython.Build import cythonize
import numpy 
from setuptools import setup, Extension
include = [numpy.get_include()]

#args = ['/O2', '/fp:fast', '/Qfast_transcendentals']
# above args are for MSVC compiler (windows)

# below args are for GCC
args = ['-O3', '-ffast-math', '	-fast-transcendentals']
setup(
    ext_modules = cythonize([Extension('pdist_cython', sources=['pdist_cython.pyx'], include_dirs=include, extra_compile_args=args)], 
        compiler_directives={'language_level' : "3"},
        ),
    zip_safe=False, 
    )
'''

with open('setup.py','w') as f:
    f.write(setup_py_cython_str)

import subprocess
import sys
subprocess.run([sys.executable, "setup.py", "build_ext", "--inplace"]) 

#import example_cython

#print(example_cython.test(5)    )
    
import pdist_cython
#importlib.reload(pdist_cython)

Compiling pdist_cython.pyx because it changed.
[1/1] Cythonizing pdist_cython.pyx
running build_ext
building 'pdist_cython' extension
x86_64-linux-gnu-gcc -pthread -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/home/trygvrad/.local/lib/python3.8/site-packages/numpy/core/include -I/usr/include/python3.8 -c pdist_cython.c -o build/temp.linux-x86_64-3.8/pdist_cython.o


In file included from /home/trygvrad/.local/lib/python3.8/site-packages/numpy/core/include/numpy/ndarraytypes.h:1969,
                 from /home/trygvrad/.local/lib/python3.8/site-packages/numpy/core/include/numpy/ndarrayobject.h:12,
                 from /home/trygvrad/.local/lib/python3.8/site-packages/numpy/core/include/numpy/arrayobject.h:4,
                 from pdist_cython.c:773:
      |  ^~~~~~~


x86_64-linux-gnu-gcc -pthread -shared -Wl,-O1 -Wl,-Bsymbolic-functions -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fwrapv -O2 -Wl,-Bsymbolic-functions -Wl,-z,relro -g -fwrapv -O2 -g -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 build/temp.linux-x86_64-3.8/pdist_cython.o -o build/lib.linux-x86_64-3.8/pdist_cython.cpython-38-x86_64-linux-gnu.so
copying build/lib.linux-x86_64-3.8/pdist_cython.cpython-38-x86_64-linux-gnu.so -> 


In [None]:
t0 = time.time()
r3 = pdist_cython.main(a,b)
t1 = time.time()
print(f'cython {t1-t0:.2f} ')
#print(r3-r1)

: 

In [8]:
import scipy.spatial.distance

t0 = time.time()
r4 = scipy.spatial.distance.cdist(a,b, 'euclidean')
t1 = time.time()
print(f'scipy {t1-t0:.2f} ')
#print(r4-r1)

scipy 0.27 


In [9]:
import ctypes
import numpy as np

def compile_and_link_c_string(s, c_file_name = 'test.c'):
    with open(c_file_name,'w') as f:
        f.write(s)

    from distutils.ccompiler import new_compiler

    compiler = new_compiler()

    objects = compiler.compile([c_file_name])
    so_file_name = c_file_name[:-2]+'.so'
    compiler.link(compiler.SHARED_LIBRARY, objects, so_file_name)

    c_lib = ctypes.CDLL( so_file_name )
    return c_lib
import ctypes


In [10]:
c_code_string = '''
// test.c

#include <stdio.h>
#include <math.h>

void pdist(int n, int m, int k, const double * indatav_A, const double * indatav_B, double * outdatav) {

    const double (*A)[k] = (double (*)[k]) indatav_A;
    const double (*B)[k] = (double (*)[k]) indatav_B;    
    double (*outdata)[m] = (double (*)[m]) outdatav;    
    
    int i, j, l;
    double tmp, tmp2;
    for (i = 0; i < n ; ++i) {
        for (j = 0; j < m; ++j){
            tmp = 0;
            for (l = 0; l < k; l++){
                 tmp2 = A[i][l]-B[j][l];
                 tmp += tmp2*tmp2;
            }
            outdata[i][j] = sqrt(tmp);
        }
    }
}

'''

c_lib = compile_and_link_c_string(c_code_string, c_file_name = 'pdist.c')

pdist_c = c_lib.pdist
pdist_c.restype = None
pdist_c.argtypes = [ctypes.c_int,
                    ctypes.c_int,
                    ctypes.c_int,
                    np.ctypeslib.ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"),
                    np.ctypeslib.ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"),
                    np.ctypeslib.ndpointer(ctypes.c_double, flags="C_CONTIGUOUS")]

   

In [11]:

t0 = time.time()
r5 = np.empty((a.shape[0], b.shape[0]))
pdist_c(a.shape[0], b.shape[0], a.shape[1], a, b, r5)
t1 = time.time()
print(f'c {t1-t0:.2f} s')
#print(r5-r1)

c 1.68 s
