In [45]:
import numpy as np
import numpy.typing as npt

%load_ext cython

import Cython.Compiler.Options
Cython.Compiler.Options.annotate = True

The cython extension is already loaded. To reload it, use:
  %reload_ext cython


In [46]:
rng = np.random.default_rng(0)
M = 40
args = 0, M, rng.uniform(low=0.1, high=10, size=M), rng.uniform(low=1, high=1e3, size=M) * 1e-9, 2*np.pi/(2000*1e-9), 2,3, -np.pi/7

In [47]:
from src.reflectivity import amplitude
ans = amplitude(*args)
ans

(-0.2289020061365319+1.2273323607687665e-10j)

In [48]:
%timeit amplitude(*args)

320 µs ± 49.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


# Algorithm 1: Big matrix inversion (as in Hobson and Baldwin paper)

In [49]:
%%cython

# cython: language_level=3, boundscheck=False, wraparound=False, cdivision=True
# Deactivate bounds checking, Deactivate negative indexing, No division by zero checking

import scipy.linalg
cimport numpy as np
import numpy as np
import cython

cdef extern from "<complex.h>" nogil:
    double complex cexp(double complex z)
    double real(double complex z)
    double complex csqrt(double complex z)


cdef extern from "<math.h>" nogil:
    double cos(double arg)
    double sin(double arg)

cdef double PI = 3.14159265358979323846

cpdef complex amplitude_cython(int polarisation, int M, np.ndarray [double, ndim=1] n, np.ndarray [double, ndim=1] d, double wavelength, double n_outer,
                 double n_substrate, double theta_outer):
    cdef np.ndarray [complex, ndim=2] mat = make_matrix(polarisation, M, n, d, 2 * PI / wavelength, n_outer, n_substrate, theta_outer)
    cdef np.ndarray [complex, ndim=1] c = make_vector(M)
    # M is a banded matrix so that 'solve_banded' can be used. Flags are set for performance.
    cdef np.ndarray [complex, ndim=1] x = scipy.linalg.solve_banded(l_and_u=(2, 2), ab=mat, b=c, overwrite_ab=True, overwrite_b=True, check_finite=False)
    return x[0]


cpdef np.ndarray [complex, ndim=2] make_matrix(int polarisation, int M, np.ndarray [double, ndim=1] n, np.ndarray [double, ndim=1] d, double k_outer, double n_outer, double n_substrate, double theta_outer):
    cdef complex cos_theta_outer = cos(theta_outer)
    cdef np.ndarray [complex, ndim=1] cos_theta = np.sqrt(np.complex_(1 - (n_outer / n) ** 2 * np.sin(theta_outer) ** 2))
    cdef complex cos_theta_substrate = csqrt(1 - (n_outer / n_substrate) ** 2 * sin(theta_outer) ** 2)

    cdef np.ndarray [complex, ndim=1] exps = np.exp(1j * (k_outer * d) * (n / n_outer) * cos_theta)

    cdef complex a_mat = -1
    cdef complex b_mat = 1
    cdef complex c_mat = 1
    cdef complex d_mat = n_substrate * cos_theta_substrate

    cdef np.ndarray [complex, ndim=2] mat = np.zeros((5, 2*M+2), dtype=complex)
    mat[2, 0] = a_mat
    mat[3, 0] = b_mat
    mat[1, 2 * M + 1] = c_mat
    mat[2, 2 * M + 1] = d_mat

    mat[0, 1] = 0
    mat[1, 1] = 1
    mat[2, 1] = (n[0] / n_outer) * (cos_theta[0] / cos_theta_outer)
    mat[3, 1] = -exps[0]
    mat[4, 1] = -exps[0] * n[0] * cos_theta[0]

    mat[0, 2] = exps[0]
    mat[1, 2] = -(n[0] / n_outer) * (cos_theta[0] / cos_theta_outer) * exps[0]
    mat[2, 2] = -1
    mat[3, 2] = n[0] * cos_theta[0]
    mat[4, 2] = 0

    cdef int i
    for i in range(1, M):
        mat[0, 2 * i + 1] = 0
        mat[1, 2 * i + 1] = 1
        mat[2, 2 * i + 1] = n[i] * cos_theta[i]
        mat[3, 2 * i + 1] = -exps[i]
        mat[4, 2 * i + 1] = -exps[i] * n[i] * cos_theta[i]

        mat[0, 2 * i + 2] = exps[i]
        mat[1, 2 * i + 2] = -n[i] * cos_theta[i] * exps[i]
        mat[2, 2 * i + 2] = -1
        mat[3, 2 * i + 2] = n[i] * cos_theta[i]
        mat[4, 2 * i + 2] = 0

    return mat


cpdef np.ndarray [complex, ndim=1] make_vector(int M):
    cdef np.ndarray [complex, ndim=1] c = np.zeros(2 * M + 2, dtype=complex)
    c[0] = 1
    c[1] = 1
    return c

In [50]:
np.testing.assert_almost_equal(amplitude(*args), amplitude_cython(*args))

In [51]:
%timeit amplitude_cython(*args)

50.2 µs ± 994 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


# Algorithm 2: Multiplying transfer matrices in cython

In [56]:
%%cython -a

# cython: language_level=3, boundscheck=False, wraparound=False, cdivision=True
# Deactivate bounds checking, Deactivate negative indexing, No division by zero checking

cimport numpy as np
import numpy as np
import cython

cdef extern from "<complex.h>" nogil:
    double complex cexp(double complex z)
    double real(double complex z)
    double complex csqrt(double complex z)


cdef extern from "<math.h>" nogil:
    double cos(double arg)
    double sin(double arg)

cdef double PI = 3.14159265358979323846

cpdef complex amplitude_transfer_matrix_cython(int polarisation, int M, np.ndarray [double, ndim=1] n, np.ndarray [double, ndim=1] d, double wavelength, double n_outer,
                 double n_substrate, double theta_outer):
    cdef complex cos_theta_outer = cos(theta_outer)
    cdef np.ndarray [complex, ndim=1] cos_theta = np.sqrt(np.complex_(1 - (n_outer / n) ** 2 * np.sin(theta_outer) ** 2))
    cdef complex cos_theta_substrate = csqrt(1 - (n_outer / n_substrate) ** 2 * sin(theta_outer) ** 2)

    # Calculate transfer matrix T
    cdef complex[2][2] T = [[1, 0], [0, 1]]
    cdef complex[2][2] temp
    cdef complex[2][2] other_temp
    cdef complex phi

    T_ij(temp, n_outer, cos_theta_outer, n[0], cos_theta[0])
    matmul(T, temp, other_temp)

    cdef int i
    for i in range(1, M):
        phi = 2 * PI / wavelength * n[i - 1] / n_outer * d[i - 1] * cos_theta[i - 1]
        T_i(temp, phi)
        matmul(T, temp, other_temp)

        T_ij(temp, n[i - 1], cos_theta[i - 1], n[i], cos_theta[i])
        matmul(T, temp, other_temp)

    phi = 2 * PI / wavelength * n[M - 1] / n_outer * d[M - 1] * cos_theta[M - 1]
    T_i(temp, phi)
    matmul(T, temp, other_temp)

    T_ij(temp, n[M - 1], cos_theta[M - 1], n_substrate, cos_theta_substrate)
    matmul(T, temp, other_temp)

    return T[1][0] / T[0][0]

cdef void T_i(complex[:, :] temp, complex phi):
    temp[0][0] = cexp(-1j * phi)
    temp[0][1] = 0
    temp[1][0] = 0
    temp[1][1] = cexp(1j * phi)

cdef void T_ij(complex[:, :] temp, double n_i, complex cos_theta_i, double n_j, complex cos_theta_j):
    cdef complex r = r_ij_s(n_i, cos_theta_i, n_j, cos_theta_j)
    temp[0][0] = 1
    temp[0][1] = r
    temp[1][0] = r
    temp[1][1] = 1


cdef void matmul(complex[:, :] A, complex[:, :] B, complex[:, :] other_temp):
    """
    Calculates AB and stores result in A
    """
    other_temp[0][0] = A[0][0] * B[0][0] + A[0][1] * B[1][0]
    other_temp[0][1] = A[0][0] * B[0][1] + A[0][1] * B[1][1]
    other_temp[1][0] = A[1][0] * B[0][0] + A[1][1] * B[1][0]
    other_temp[1][1] = A[1][0] * B[0][1] + A[1][1] * B[1][1]

    cdef int i
    cdef int j
    for i in range(2):
        for j in range(2):
            A[i, j] = other_temp[i][j]

cdef complex r_ij_s(double n_i, complex cos_theta_i, double n_j, complex cos_theta_j):
    return (<complex>n_i * cos_theta_i - <complex>n_j * cos_theta_j) / (<complex>n_i * cos_theta_i + <complex>n_j * cos_theta_j)


In [57]:
print(amplitude_transfer_matrix_cython(*args))
np.testing.assert_almost_equal(amplitude(*args), amplitude_transfer_matrix_cython(*args))

(-0.2289020061365326+1.2273288656692643e-10j)


In [58]:
%timeit amplitude_transfer_matrix_cython(*args)

The slowest run took 4.33 times longer than the fastest. This could mean that an intermediate result is being cached.
310 µs ± 196 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


# Results
Winner: Algorithm 1