In [1]:
%load_ext line_profiler
#%lprun -f bad_call -f worse_call sumulate(13)

In [2]:
from numba import jit, njit, vectorize,config
import cProfile

In [22]:
import matplotlib.pyplot as plt
import seaborn as sbn
import pandas as pd
import numpy as np
import random
import math

In [4]:
config.THREADING_LAYER = 'threadsafe'
sbn.set()

# A _While_ Test

So, the great problem of our system is that time advance a Poisson process (somthing exponential) and it is time-expensive. A way to solve this problem is to try to optimize first our process

First, let us define two random exp functions

In [6]:
def random_exp(a0):
    r = np.random.random()
    #tau = (1/a0)*np.log(1/r)
    return (1/a0)*np.log(1/r)

In [7]:
def random_exp_np(a0):
    return np.random.exponential(1/a0)

In [10]:
%timeit -r 15 random_exp(2)

1.15 µs ± 17.7 ns per loop (mean ± std. dev. of 15 runs, 1000000 loops each)


In [11]:
%timeit -r 15 random_exp_np(2)

1.93 µs ± 80.5 ns per loop (mean ± std. dev. of 15 runs, 100000 loops each)


The times are pretty close but the harcoded implementation got a lower std. dev. which means is more stable and predictable. 

Now, lets njit the functions

In [12]:
random_exp_jitted = njit(random_exp)
random_exp_np_jitted = njit(random_exp_np)

And to end let us run the timeit test again.

In [15]:
direct_method = %timeit -o -r 15 random_exp_jitted(2.0)

181 ns ± 11.7 ns per loop (mean ± std. dev. of 15 runs, 10000000 loops each)


In [16]:
numpy_method = %timeit -o -r 15 random_exp_np_jitted(2.0)

202 ns ± 9.32 ns per loop (mean ± std. dev. of 15 runs, 1000000 loops each)


In [20]:
print('Worst in direct was {:.2} and numpy as {:.2}'.format(direct_method.worst,numpy_method.worst))

Worst in direct was 2.1e-07 and numpy as 2.2e-07


In [21]:
direct_method.worst<numpy_method.worst

True

Even in the worst was faster our direct method. 

Now, let us notice that in our direct method we depend on a numpy medthod: `numpy.log`. Let us now see if our direct method gets faster with `math.log` method

In [25]:
def random_exp_numpy(a0):
    r = np.random.random()
    #tau = (1/a0)*np.log(1/r)
    return (1/a0)*np.log(1/r)

def random_exp_math(a0):
    r = np.random.random()
    #tau = (1/a0)*np.log(1/r)
    return (1/a0)*math.log(1/r)

In [30]:
numpy_log = %timeit -o -r 15 random_exp_numpy(2.0)

1.29 µs ± 98.8 ns per loop (mean ± std. dev. of 15 runs, 1000000 loops each)


In [31]:
math_log = %timeit -o -r 15 random_exp_math(2.0)

571 ns ± 13.2 ns per loop (mean ± std. dev. of 15 runs, 1000000 loops each)


In [32]:
print('Worst in numpy was {:.2} and math as {:.2}'.format(numpy_log.worst,math_log.worst))

Worst in numpy was 1.4e-06 and math as 6.2e-07


In [34]:
print('Is math best in the direct method than numpy? {}'.format(numpy_log.worst>math_log.worst) )

Is math best in the direct method than numpy? True


Now let us njit our functions

In [36]:
random_exp_numpy_jitted = njit(random_exp_numpy)
random_exp_math_jitted = njit(random_exp_math)

In [37]:
numpy_log_jitted = %timeit -o -r 15 random_exp_numpy_jitted(2.0)

176 ns ± 3 ns per loop (mean ± std. dev. of 15 runs, 10000000 loops each)


In [38]:
math_log_jitted = %timeit -o -r 15 random_exp_math_jitted(2.0)

195 ns ± 4.35 ns per loop (mean ± std. dev. of 15 runs, 10000000 loops each)


In [40]:
print('Worst in numpy jitted was {:.2} and math jitted as {:.2}'.format(numpy_log_jitted.worst,math_log_jitted.worst))

Worst in numpy jitted was 1.8e-07 and math jitted as 2e-07


In [42]:
print('Remains math best in the direct method than numpy once jitted? {}'.format(numpy_log_jitted.worst>math_log_jitted.worst) )

Remains math best in the direct method than numpy once jitted? False


In [46]:
numpy_log_jitted.worst<math_log_jitted.worst

True

In [45]:
numpy_log_jitted.best<math_log_jitted.best

True

In [47]:
numpy_log_jitted.average<math_log_jitted.average

True

Then the direct method is faster with njit method. 

Now lets do a pair of while loops:

In [48]:
@njit
def random_exp_numpy(a0):
    """
    This functions generates a exponential random 
    variable with mean and std. dev. 1/a0
    """
    r = np.random.random()
    return (1/a0)*np.log(1/r)

In [65]:
def Loop_1(t0=0.0,t_max=10.0,a0=2.0):
    while t0<t_max:
        t0 += random_exp_numpy(a0)
        
def Loop_2(t0=0.0,t_max=10.0,a0=2.0):
    while True:
        if t_max<t0:
            break
        t0 += random_exp_numpy(a0)

In [66]:
%timeit -r 15 Loop_1()

4.07 µs ± 67.6 ns per loop (mean ± std. dev. of 15 runs, 100000 loops each)


In [67]:
%timeit -r 15 Loop_2()

4.12 µs ± 68.8 ns per loop (mean ± std. dev. of 15 runs, 100000 loops each)


Now, let us consider a greater time, like `t_max = 100`.

In [68]:
%timeit -r 15 Loop_1(t_max=100.0)

39.2 µs ± 2.97 µs per loop (mean ± std. dev. of 15 runs, 10000 loops each)


In [69]:
%timeit -r 15 Loop_2(t_max=100.0)

40.1 µs ± 2.39 µs per loop (mean ± std. dev. of 15 runs, 10000 loops each)


Keeping the function with stop condition on `while` gets best performance. Now, let us try to profiling the functions

In [70]:
%lprun -f Loop_1 Loop_1(t_max=100.0)

Timer unit: 1e-06 s

Total time: 0.000234 s
File: <ipython-input-65-5006a524d3c1>
Function: Loop_1 at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def Loop_1(t0=0.0,t_max=10.0,a0=2.0):
     2       187         77.0      0.4     32.9      while t0<t_max:
     3       186        157.0      0.8     67.1          t0 += random_exp_numpy(a0)

In [71]:
%lprun -f Loop_2 Loop_2(t_max=100.0)

Timer unit: 1e-06 s

Total time: 0.000265 s
File: <ipython-input-65-5006a524d3c1>
Function: Loop_2 at line 5

Line #      Hits         Time  Per Hit   % Time  Line Contents
     5                                           def Loop_2(t0=0.0,t_max=10.0,a0=2.0):
     6         1          1.0      1.0      0.4      while True:
     7       220         85.0      0.4     32.1          if t_max<t0:
     8         1          1.0      1.0      0.4              break
     9       219        178.0      0.8     67.2          t0 += random_exp_numpy(a0)

Keeping the condition on `while` give is a best performance cause we stop without executing an external `break`, thats why we save time. 

Now, let us see if there is performance _njiting_ the functions.

In [72]:
Loop_1_jitted = njit(Loop_1)
Loop_2_jitted = njit(Loop_2)

In [73]:
%timeit -r 15 Loop_1_jitted(t_max=100.0)

21.6 µs ± 538 ns per loop (mean ± std. dev. of 15 runs, 10000 loops each)


In [74]:
%timeit -r 15 Loop_2_jitted(t_max=100.0)

22 µs ± 600 ns per loop (mean ± std. dev. of 15 runs, 10000 loops each)


It improved for almost a factor of 2x! (~1.8x)