In [1]:
import numpy as np
import numba as nb
import numpy.random as rng

mus = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
x, y = 5000, 320


# no-inlining

In [2]:
@nb.njit
def exponential(scales):
    
    length = len(scales)
    t_array = np.zeros((length))
    
    for i in range(length):
        t_array[i] = rng.exponential(scales[i])
        
    return t_array

@nb.njit
def rand(mus, step_max, seed):
    
    step = 0
    tau = 0.
    
    rng.seed(seed)
    dtaus = np.zeros(len(mus))
    
    while (step < step_max):
        step = step + 1
        
        dtaus = exponential(1./mus)
        
        tau += np.min(dtaus)
        
    return tau

@nb.njit
def nopar_simulate_multi(mus, step_max, num):
    
    seeds = np.arange(num)
    taus = np.zeros(num)
    
    for i in nb.prange(num):
        tau = rand(mus, step_max, seeds[i])
        
        taus[i] = tau
        
        
    return taus

@nb.njit(parallel = True)
def par_simulate_multi(mus, step_max, num):
    
    seeds = np.arange(num)
    taus = np.zeros(num)
    
    for i in nb.prange(num):
        tau = rand(mus, step_max, seeds[i])
        
        taus[i] = tau
        
        
    return taus


In [3]:
# compile 
nopar_simulate_multi(mus,  2, 2)
par_simulate_multi(mus, 2, 2)

%timeit tmp = nopar_simulate_multi(mus, x, y)
%timeit tmp = par_simulate_multi(mus, x, y)

10.1 s ± 14.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
7.33 s ± 58.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Manual inlining

In [4]:
@nb.njit
def rand_wo(mus, step_max, seed):

    
    step = 0
    tau = 0.
    
    rng.seed(seed)
    dtaus = np.zeros(len(mus))
    
    while (step < step_max):
        step = step + 1
        
        for i in range(len(mus)):
            dtaus[i] = rng.exponential(1./mus[i])
        
        
        tau += np.min(dtaus)
        
    return tau

@nb.njit
def nopar_simulate_multi_wo(mus, step_max, num):
    
    seeds = np.arange(num)
    taus = np.zeros(num)
    
    for i in nb.prange(num):
        tau = rand_wo(mus, step_max, seeds[i])
        
        taus[i] = tau
        
        
    return taus


@nb.njit(parallel = True)
def par_simulate_multi_wo(mus, step_max, num):
    
    seeds = np.arange(num)
    taus = np.zeros(num)
    
    for i in nb.prange(num):
        tau = rand_wo(mus, step_max, seeds[i])
        
        taus[i] = tau
        
        
    return taus



In [5]:
# compile 
nopar_simulate_multi_wo(mus,  2, 2)
par_simulate_multi_wo(mus, 2, 2)

%timeit tmp = nopar_simulate_multi_wo(mus,  x, y)
%timeit tmp = par_simulate_multi_wo(mus, x, y)

758 ms ± 813 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
20.2 ms ± 3.79 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


# numba inliner

In [6]:
@nb.njit(inline='always') # switch on IR inlining
def exponential_inlined(scales):
    
    length = len(scales)
    t_array = np.zeros((length))
    
    for i in range(length):
        t_array[i] = rng.exponential(scales[i])
        
    return t_array

@nb.njit
def rand_w_inlined_exponential(mus, step_max, seed):
    
    step = 0
    tau = 0.
    
    rng.seed(seed)
    dtaus = np.zeros(len(mus))
    
    while (step < step_max):
        step = step + 1
        
        dtaus = exponential_inlined(1./mus)
        
        tau += np.min(dtaus)
        
    return tau

@nb.njit
def nopar_simulate_multi_inlined_exponential(mus, step_max, num):
    
    seeds = np.arange(num)
    taus = np.zeros(num)
    
    for i in nb.prange(num):
        tau = rand_w_inlined_exponential(mus, step_max, seeds[i])
        
        taus[i] = tau
        
        
    return taus

@nb.njit(parallel = True)
def par_simulate_multi_inlined_exponential(mus, step_max, num):
    
    seeds = np.arange(num)
    taus = np.zeros(num)
    
    for i in nb.prange(num):
        tau = rand_w_inlined_exponential(mus, step_max, seeds[i])
        
        taus[i] = tau
        
        
    return taus





In [7]:
# compile 
nopar_simulate_multi_inlined_exponential(mus,  2, 2)
par_simulate_multi_inlined_exponential(mus, 2, 2)

%timeit tmp = nopar_simulate_multi_inlined_exponential(mus, x, y)
%timeit tmp = par_simulate_multi_inlined_exponential(mus, x, y)

9.89 s ± 12.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
7.38 s ± 152 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# no memory allocation in exponential 

In [8]:
@nb.njit
def exponential_nomemalloc(scales, t_array):
    
    for i in range(len(t_array)):
        t_array[i] = rng.exponential(scales[i])


@nb.njit
def rand_nomemalloc(mus, step_max, seed, dtaus):
    
    step = 0
    tau = 0.
    
    rng.seed(seed)
    
    while (step < step_max):
        step = step + 1
        
        exponential_nomemalloc(1./mus, dtaus)
        
        tau += np.min(dtaus)
        
    return tau

@nb.njit
def nopar_simulate_multi_nomemalloc(mus, step_max, num):
    
    seeds = np.arange(num)
    taus = np.zeros(num)
    
    dtaus = np.zeros((num, len(mus)))
    
    for i in nb.prange(num):
        tau = rand_nomemalloc(mus, step_max, seeds[i], dtaus[i, :])
        
        taus[i] = tau
        
        
    return taus

@nb.njit(parallel = True)
def par_simulate_multi_nomemalloc(mus, step_max, num):
    
    seeds = np.arange(num)
    taus = np.zeros(num)
    dtaus = np.zeros((num, len(mus)))
    
    for i in nb.prange(num):
        tau = rand_nomemalloc(mus, step_max, seeds[i], dtaus[i, :])
        
        taus[i] = tau
        
        
    return taus


In [9]:
# compile 
nopar_simulate_multi_nomemalloc(mus,  2, 2)
par_simulate_multi_nomemalloc(mus,  2, 2)

%timeit tmp = nopar_simulate_multi_nomemalloc(mus, x, y)
%timeit tmp = par_simulate_multi_nomemalloc(mus, x, y)


247 ms ± 2.98 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
335 ms ± 1.83 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
