# **Numba**

### Numba is a JIT Compiler and uses LLVM internally - No compilation required !

![](./img/numba_flowchart.png)

In [None]:
import time

def get_time_taken(func, *args):
    res = func(*args)
    start = time.time()
    func(*args)
    end = time.time()
    time_taken = end - start
    print(f"Total time - {time_taken:.5f} seconds")
    print(res)

In [None]:
from numba import jit
from math import tan, atan

@jit
def slow_function(n):
    result = 0
    for x in range(n ** 7):
        result += tan(x) * atan(x)
    return result

get_time_taken(slow_function, 10)

### The speed up is obvious but there are a lot of caveats

### For example, any function used must also be "decorated"

In [None]:
from numba import jit, int32

@jit(int32(int32), nopython=True)
def func(x):
    return tan(x) * atan(x)

@jit(int32(int32), nopython=True)
def slow_function(n):
    result = 0
    for x in range(n ** 7):
        result += func(x)
    return result

get_time_taken(slow_function, 10)

### Notice the slight overhead 

In [None]:
from numba import prange,jit, int32

@jit(int32(int32), nopython=True, parallel=True)
def slow_function(n):
    result = 0
    for x in prange(n ** 7):
        result += tan(x) * atan(x)
    return result

get_time_taken(slow_function, 10)

### prange is the parallel version of the range function in python and parallel=True option optimizes the code to use all the cores
### Lets see how it works with Numpy

In [None]:
from numba import jit, int32
import numpy as np

@jit(int32(int32), nopython=True)
def slow_func_in_numpy(n):
    result = 0
    for x in np.arange(n ** 7):
        result += np.tan(x) * np.arctan(x)
    return result

get_time_taken(slow_func_in_numpy, 10)

### Do I have to write functions for every type?

In [None]:
from numba import jit,  int32, int64, float32, float64
from math import tan, atan

@jit([int32(int32), int64(int64), float32(float32), float64(float64)])
def slow_function(n):
    result = 0
    for x in range(n ** 7):
        result += tan(x) * atan(x)
    return result

get_time_taken(slow_function, 10)
get_time_taken(slow_function, 10.2)

### Let's see how we can create numpy ufuncs using numba

In [None]:
from numba import vectorize, int32, int64, float32, float64
import numpy as np

@vectorize([int32(int32, int32),
            int64(int64, int64),
            float32(float32, float32),
            float64(float64, float64)])
def addfunc(x, y):
    return x + y

@vectorize
def simpler_addfunc(x, y):
    return x + y

In [None]:
addfunc(2, 3)

In [None]:
addfunc(6.42, 9.8)

In [None]:
simpler_addfunc(2, 3.4)

In [None]:
simpler_addfunc(np.array([1,2,3]), np.array([4,5,6]))

### Limited support for classes

In [None]:
from numba import jitclass

spec = [
    ('x', int32),
    ('y', int32)
]

@jitclass(spec)
class Node(object):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def distance(self, n):
        return (self.x - n.x) ** 2 + (self.y - n.y) ** 2
    
    def distance_from_point(self, x, y):
        return (self.x - x) ** 2 + (self.y - y) ** 2
    
n1 = Node(3,2)
n2 = Node(9,6)

In [None]:
%time n1.distance(n2)

In [None]:
%time n1.distance_from_point(4,5)

### This is just a glance into what numba can do, but remember, it does come with its own limitations

Numba Limitations
=================

1. No Strings Support
2. No support for exception handling (try .. except, try .. finally)
3. No support for context management (the with statement)
4. list comprehension is supported, but not dict, set or generator comprehensions
5. No support for generator delegation (yield from)

raise and assert are supported

# **Exercise**

Try using numba's @jit decorator with the function you wrote earlier and check with %time if there is any improvement in the performance

**If you find any improvement, feel free to tweet about your experience with the handle @pyconfhyd**