In [1]:
import numba
import numpy as np
from typing import Callable, Tuple, Any

In [22]:
# define test data
nx, ny = 10000, 10000
dx = 1.
g = 1.
eta = np.random.rand(nx, ny)

In [23]:
# Different numba options
@numba.njit
def _iterate_over_grid_2D_plain(
    loop_body: Callable[..., float], ni: int, nj: int, args: Tuple[Any]
) -> np.array:
    result = np.empty((ni, nj))
    for i in range(ni):
        for j in range(nj):
            result[i, j] = loop_body(*args, i, j, ni, nj)
    return result


@numba.njit
def _zonal_pressure_gradient_loop_body_plain(
    eta: np.array, g: float, dx: float, i: int, j: int, ni: int, nj: int
) -> float:
    ip1 = (i + 1) % ni
    return -g * (eta[ip1, j] - eta[i, j]) / dx


@numba.njit(fastmath=True)
def _iterate_over_grid_2D_fastmath(
    loop_body: Callable[..., float], ni: int, nj: int, args: Tuple[Any]
) -> np.array:
    result = np.empty((ni, nj))
    for i in range(ni):
        for j in range(nj):
            result[i, j] = loop_body(*args, i, j, ni, nj)
    return result


@numba.njit(fastmath=True)
def _zonal_pressure_gradient_loop_body_fastmath(
    eta: np.array, g: float, dx: float, i: int, j: int, ni: int, nj: int
) -> float:
    ip1 = (i + 1) % ni
    return -g * (eta[ip1, j] - eta[i, j]) / dx


In [34]:
%timeit _iterate_over_grid_2D_plain(_zonal_pressure_gradient_loop_body_plain, nx, ny, (eta, g, dx))

618 ms ± 5.42 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [26]:
%timeit _iterate_over_grid_2D_fastmath(_zonal_pressure_gradient_loop_body_fastmath, nx, ny, (eta, g, dx))

640 ms ± 7.29 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [40]:
# impact of inlining
@numba.njit
def _iterate_over_grid_2D_manual_inlined(
    ni: int, nj: int, eta: np.array, g: float, dx: float
) -> np.array:
    result = np.empty((ni, nj))
    for i in range(ni):
        for j in range(nj):
            ip1 = (i + 1) % ni
            result[i, j] = -g * (eta[ip1, j] - eta[i, j]) / dx
    return result

%timeit _iterate_over_grid_2D_manual_inlined(nx, ny, eta, g, dx)

474 ms ± 10.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [52]:
@numba.njit
def _iterate_over_grid_2D_inlined(
    ni: int, nj: int, eta: np.array, g: float, dx: float
) -> np.array:
    result = np.empty((ni, nj))
    for i in range(ni):
        for j in range(nj):
            result[i, j] = _zonal_pressure_gradient_loop_body_inline(eta, g, dx, i, j, ni, nj)
    return result


@numba.njit(inline='always')
def _zonal_pressure_gradient_loop_body_inline(
    eta: np.array, g: float, dx: float, i: int, j: int, ni: int, nj: int
) -> float:
    ip1 = (i + 1) % ni
    return -g * (eta[ip1, j] - eta[i, j]) / dx

%timeit _iterate_over_grid_2D_inlined(nx, ny, eta, g, dx)

491 ms ± 3.77 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


468 ms ± 2.82 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
