# Optimizing Python Code with [Cython](https://cython.org/)

In [None]:
import Cython
from random import random
import math
import numpy as np
import matplotlib.pyplot as plt

# This is needed to load the Cython magic
%load_ext Cython

## Calculating $\pi$ using Monte Carlo Integration

### Pure Python

In [None]:
def pi_mc(n=1000):
    '''Calculate PI using Monte Carlo method'''
    in_circle = 0
    for i in range(n):
        x, y = random(), random()
        if x ** 2 + y ** 2 <= 1.0:
            in_circle += 1
        
    return 4.0 * in_circle / n

In [None]:
%time pi_mc(10000000)

### Cython

#### 1. Use `cython` cell magic without making any code change

In [None]:
%%cython

from random import random

def pi_mc(n=1000):
    '''Calculate PI using Monte Carlo method'''
    in_circle = 0
    for i in range(n):
        x, y = random(), random()
        if x ** 2 + y ** 2 <= 1.0:
            in_circle += 1
        
    return 4.0 * in_circle / n

In [None]:
%time pi_mc(10000000)

#### 2. Static type declarations in Cython with `cdef`

Static type declarations allow Cython to step out of the dynamic nature of the Python code and produce efficient **C** code.

In [None]:
%%cython

from random import random

def pi_mc(n=1000):
    '''Calculate PI using Monte Carlo method'''
    cdef int in_circle = 0
    cdef int i
    cdef double x, y
    for i in range(n):
        x, y = random(), random()
        if x ** 2 + y ** 2 <= 1.0:
            in_circle += 1
        
    return 4.0 * in_circle / n

In [None]:
%time pi_mc(10000000)

#### 3. Using Cython `annotate` option and inspect the generated C code

In [None]:
%%cython --annotate

from random import random

def pi_mc(int n=1000):
    '''Calculate PI using Monte Carlo method'''
    cdef:
        int in_circle = 0
        int i
        double x, y
    for i in range(n):
        x, y = random(), random()
        if x ** 2 + y ** 2 <= 1.0:
            in_circle += 1
        
    return 4.0 * in_circle / n

In [None]:
%time pi_mc(10000000)

## Types of Cython functions

Cython offers three different types of function declared with `def`, `cdef`, `cpdef`:

1. Functions declared with `def` can be called from Python and Cython code.
2. Functions declared with `cdef` can be only called from Cython code.
3. `cpdef` causes Cython to create two versions of the function. One which is used when the function is called from Cython code and one when it is called from Python.

In [None]:
%%cython 


cdef double cube(double x):
    return x * x * x


cpdef double cube_sum(double x, double y):
    return cube(x) + cube(y)

### Function Inlining

We can use the `cdef inline` for small functions that are used often and Cython inlines them reducing overhead

In [None]:
%%cython --annotate


cdef inline double cube(double x):
    return x * x * x


cpdef double cube_sum(double x, double y):
    return cube(x) + cube(y)

## Typed Memory Views

Cython allows access to the contents of NumPy arrays by **memory views**.

In [None]:
%%cython --annotate

def my_sum(double[:] x):
    cdef int i, nx = x.shape[0]
    cdef double s = 0.0
    for i in range(nx):
        s += x[i]
        
    return s

In [None]:
x = np.ones(1000, dtype=np.float64)
s1 = my_sum(x)
s2 = x.sum()
print(s1, s2)

In [None]:
%%cython --annotate

def my_sum2d(double[:, :] x):
    cdef int i, j, nx = x.shape[0], ny = x.shape[1]
    cdef double s = 0.0
    for i in range(nx):
        for j in range(ny):
            s += x[i, j]
        
    return s

In [None]:
x = np.ones((1000, 1000), dtype=np.float64)
s1 = my_sum2d(x)
s2 = x.sum()
print(s1, s2)

## Disable bounds checking and wraparound

For safety reasons, Cython checks if we try to access elements out of the array boundaries. 
Furthermore it allows using negative array indices. We can exhange safety with performance disabling both of them.


In [None]:
%%cython --annotate

from cython cimport wraparound, boundscheck

@wraparound(False)
@boundscheck(False)
cpdef double my_sum2d(double[:, :] x):
    cdef int i, j, nx = x.shape[0], ny = x.shape[1]
    cdef double s = 0.0
    for i in range(nx):
        for j in range(ny):
            s += x[i, j]
        
    return s

## Releasing the GIL and parallelizing loops

### Calculating Julia Sets

In [None]:
%%cython

from cython cimport boundscheck, wraparound

@wraparound(False)
@boundscheck(False)
def julia_set_cython(const double[:, :] X, const double[:, :] Y,
                     const double cx, const double cy,
                     const int iter_max, const double radius2, 
                     int[:, :] julia):
    cdef:
        int i, j, k, nx, ny
        double x, y
    nx = X.shape[0]
    ny = Y.shape[1]
    for i in range(nx):
        for j in range(ny):
            x = X[i, j]
            y = Y[i, j]
            k = 0
            while x * x + y * y < radius2 and k < iter_max:
                x, y = x * x - y * y + cx, 2.0 * x * y + cy
                k = k + 1
                
            julia[i, j] = k

In [None]:
X, Y = np.meshgrid(np.linspace(-2.0 , 2.0, 5000), np.linspace(-2.0, 2.0, 5000))
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111)
julia = np.zeros_like(X, dtype=np.int32)
c = -0.9 + 0.22143j
radius2 = 4.0
%timeit julia_set_cython(X, Y, c.real, c.imag, 100, radius2, julia)
ax.set_aspect('equal')
ax.imshow(julia, extent=[-2, 2, -2, 2]);

In [None]:
%%cython

from cython cimport boundscheck, wraparound
from cython.parallel cimport prange

@boundscheck(False)
@wraparound(False)
def julia_set_cython(const double[:, :] X, const double[:, :] Y,
                     const double cx, const double cy,
                     const int iter_max, const double radius2, 
                     int[:, :] julia):
    cdef:
        int i, j, k, nx, ny
        double x, y
    nx = X.shape[0]
    ny = Y.shape[1]
    for i in prange(nx, nogil=True):
        for j in range(ny):
            x = X[i, j]
            y = Y[i, j]
            k = 0
            while x * x + y * y < radius2 and k < iter_max:
                x, y = x * x - y * y + cx, 2.0 * x * y + cy
                k = k + 1
                
            julia[i, j] = k

In [None]:
X, Y = np.meshgrid(np.linspace(-2.0 , 2.0, 5000), np.linspace(-2.0, 2.0, 5000))
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111)
julia = np.zeros_like(X, dtype=np.int32)
c = -0.9 + 0.22143j
radius2 = 4.0
%timeit julia_set_cython(X, Y, c.real, c.imag, 100, radius2, julia)
ax.set_aspect('equal')
ax.imshow(julia, extent=[-2, 2, -2, 2]);

In [None]:
%%cython

from cython cimport boundscheck, wraparound
from cython.parallel cimport prange

@boundscheck(False)
@wraparound(False)
def julia_set_cython(const double[:, :] X, const double[:, :] Y,
                     const double cx, const double cy,
                     const int iter_max, const double radius2, 
                     int[:, :] julia):
    cdef:
        int i, j, k, nx, ny
        double x, y
    nx = X.shape[0]
    ny = Y.shape[1]
    for i in prange(nx, nogil=True):
        for j in range(ny):
            x = X[i, j]
            y = Y[i, j]
            k = 0
            while x * x + y * y < radius2 and k < iter_max:
                x, y = x * x - y * y + cx, 2.0 * x * y + cy
                k = k + 1
                
            julia[i, j] = k

In [None]:
X, Y = np.meshgrid(np.linspace(-2.0 , 2.0, 5000), np.linspace(-2.0, 2.0, 5000))
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111)
julia = np.zeros_like(X, dtype=np.int32)
c = -0.9 + 0.22143j
radius2 = 4.0
%timeit julia_set_cython(X, Y, c.real, c.imag, 100, radius2, julia)
ax.set_aspect('equal')
ax.imshow(julia, extent=[-2, 2, -2, 2]);

### !!! WE FORGOT TO PUT THE CORRECT COMPILATION/LINKING OPTIONS !!!

In [None]:
%%cython

# distutils: extra_compile_args = -fopenmp -march=native
# distutils: extra_link_args = -fopenmp
from cython cimport boundscheck, wraparound
from cython.parallel cimport prange

@boundscheck(False)
@wraparound(False)
def julia_set_cython(const double[:, :] X, const double[:, :] Y,
                     const double cx, const double cy,
                     const int iter_max, const double radius2, 
                     int[:, :] julia):
    cdef:
        int i, j, k, nx, ny
        double x, y
    nx = X.shape[0]
    ny = Y.shape[1]
    for i in prange(nx, nogil=True):
        for j in range(ny):
            x = X[i, j]
            y = Y[i, j]
            k = 0
            while x * x + y * y < radius2 and k < iter_max:
                x, y = x * x - y * y + cx, 2.0 * x * y + cy
                k = k + 1
                
            julia[i, j] = k

In [None]:
X, Y = np.meshgrid(np.linspace(-2.0 , 2.0, 5000), np.linspace(-2.0, 2.0, 5000))
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111)
julia = np.zeros_like(X, dtype=np.int32)
c = -0.9 + 0.22143j
radius2 = 4.0
%timeit julia_set_cython(X, Y, c.real, c.imag, 100, radius2, julia)
ax.set_aspect('equal')
ax.imshow(julia, extent=[-2, 2, -2, 2]);

# Additional Cython Features

## Automatic Type Inference Using Cython's `infer_types`

In [None]:
import numpy as np
from random import random
import Cython

%load_ext Cython

In [None]:
%%cython -a

from random import random
from cython cimport infer_types

cdef inline double my_rand():
    return random()

@infer_types(True)
cpdef pi_mc_inferred(n=1000):
    '''Calculate PI using Monte Carlo method'''
    in_circle = 0
    for i in range(n):
        x = my_rand()
        y = my_rand()
        if x * x + y * y <= 1.0:
            in_circle += 1
        
    return 4.0 * in_circle / n

In [None]:
%time pi_mc_inferred(10000000)

## Cython Extensions Types

In [None]:
class PyRectangle:
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def area(self):
        return self.x * self.y
    
    def perimeter(self):
        return 2.0 * (self.x + self.y)

In [None]:
%%cython 

cdef class CyRectangle:
    cdef:
        double x, y
        
    def __cinit__(self, x, y):
        self.x = x
        self.y = y
        
    cpdef double area(self):
        return self.x * self.y
    
    cpdef double perimeter(self):
        return 2.0 * (self.x + self.y)

In [None]:
a = CyRectangle(1, 2)
print(a.area(), a.perimeter())

In [None]:
%%cython
from random import random

cdef class CyRectangle:
    cdef:
        double x, y
        
    def __cinit__(self, x, y):
        self.x = x
        self.y = y
        
    cpdef double area(self):
        return self.x * self.y
    
    cpdef double perimeter(self):
        return 2.0 * (self.x + self.y)
    
cdef class CyRectangles:
    cdef:
        list rectangles
        
    def __cinit__(self, int n):
        cdef unsigned int i
        self.rectangles = []
        for i in range(n):
            self.rectangles.append(CyRectangle(random(), random()))
            
    cpdef double total_area(self):
        cdef CyRectangle rect
        cdef double area = 0.0
        for rect in self.rectangles:
            area += rect.area()
            
        return area

In [None]:
a = CyRectangles(100000)

In [None]:
a.total_area()

## C-like Allocation/Dealllocation

In [None]:
%%cython

from libc.stdlib cimport malloc, free


cdef class CyRangeVector:
    cdef:
        int *data
        int size
        
    def __cinit__(self, int start, int end):
        cdef unsigned int i
        if start >= end:
            raise Exception(f'{start} >= {end}')
        self.size = end - start
        self.data = <int*>malloc(self.size * sizeof(int))
        
        for i in range(start, end):
            self.data[i - start] = i
            
    def __getitem__(self, int i):
        if i >= self.size or i < 0:
            return -1

        return self.data[i]
    
    def __dealloc__(self):
        free(self.data)

In [None]:
my_range = CyRangeVector(10, 11000)
my_range[2]

## Interacting with the C++ Standard Template Library

As long as we start using the C++ STL from inside Cython we have to switch to `language=c++`

In [None]:
%%cython

# distutils: language=c++

from libcpp.vector cimport vector

cdef class CyRangeVector:
    cdef:
        vector[int] data
        
    def __cinit__(self, int start, int end):
        cdef unsigned int i
        if start >= end:
            raise Exception(f'{start} >= {end}')
        for i in range(start, end):
            self.data.push_back(i)
            
    def __getitem__(self, int i):
        if i >= self.data.size() or i < 0:
            return None
        
        return self.data[i]

In [None]:
v = CyRangeVector(1, 20)
print(v[1])

In [None]:
%%cython

# distutils: language=c++

from libcpp.vector cimport vector

cpdef vector[int] cy_range(int start, int end):
    cdef vector[int] v
    cdef unsigned int i
    for i in range(start, end):
        v.push_back(i)
    
    return v

In [None]:
x = cy_range(1, 10)
print(x, type(x))

### Additional STL libraries are available and you can look at their [declaration files](https://github.com/cython/cython/tree/master/Cython/Includes/libcpp)