In [None]:
!pip3 install line_profiler

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random

test_size = int(1e6)

### Basic timing 

There are a few builtin functions in jupyter for doing this `%time`/`%%time` and `%timeit`/`%%timeit`. Using the `%` version will run the function over the line it is in front of while the `%%` version needs to be at the top of the cell and will run over the entire cell.

- `time` is used to find the time for running that code
- `timeit` is used to run the code multiple times and give the mean result


In [None]:
%time x = sum([random.random() for i in range(test_size)])

In [None]:
%%timeit
x = sum([random.random() for i in range(test_size)])

## Tip #1: Use numpy

Now lets run these cells and see the difference in speed

In [None]:
%time x = np.sum(np.random.normal(size=test_size))

In [None]:
%%timeit
x = np.sum(np.random.normal(size=test_size))

## Tip #2: Don't reinvent the wheel use numpy/scipy

numpy and scipy are packed with useful functions. Take some time at the begining to find a function that works so you're code will be faster all the time.

In [None]:
def make_square_matrix(N, value=None):
    #define empty matrix
    matrix=[] 
    for i in range(N):
        #define empty row
        row=[] 
        for j in range(N): 
            # append random number or set value to the row
            if value is None:
                row.append(random.random()) 
            else:
                row.append(value)
        # append new row to full matrix
        matrix.append(row)
    return matrix

def my_matMul(X,Y):
    # Get size of matrices
    size = len(X)
    # Make a matrix that size with all 0's
    result = make_square_matrix(size,value=0)
    # Loop through rows in X/col in Y
    for i in range(size):
        # Loop through rows in Y/col in X
        for j in range(size):
            # Loop through elements in each row/col and 
            for k in range(size):
                # Fill in the resulting matrix with the value at i,j
                result[i][j] += X[i][k] * Y[k][j]
    return result

In [None]:
# X and Y are pure python arrays
X = make_square_matrix(200)
Y = make_square_matrix(200)
%time myResult = my_matMul(X,Y)

In [None]:
# It may make sense to use numpy to try and speed it up...but it makes it slower
X = np.array(make_square_matrix(200))
Y = np.array(make_square_matrix(200))
%time myResult = my_matMul(X,Y)

In [None]:
%%time 
# Let's use our pure python arrays with numpys function...better than our original
X = make_square_matrix(200)
Y = make_square_matrix(200)
npResult = np.matmul(X,Y)

In [None]:
# Now let's just use numpy for everything
X = np.random.rand(200,200)
Y = np.random.rand(200,200)
%time npResult = np.matmul(X,Y)

## Tip #3 Profile your code

There are tools like line_profiler that will tell you what is the most used and slowest functions in your code.

In [None]:
%load_ext line_profiler
# Loads the line profiler extention into jupyter

In [None]:
%%writefile my_function.py
# In order for this to work you need to save the functions you want to look at as a file
# and then load the functions in as a module

def make_square_matrix(N, value=None):
    import random
    #define empty matrix
    matrix=[] 
    for i in range(N):
        #define empty row
        row=[] 
        for j in range(N): 
            # append random number or set value to the row
            if value is None:
                row.append(random.random()) 
            else:
                row.append(value)
        # append new row to full matrix
        matrix.append(row)
    return matrix

def my_matMul(X,Y):
    # Get size of matrices
    size = len(X)
    # Make a matrix that size with all 0's
    result = make_square_matrix(size,value=0)
    # Loop through rows in X/col in Y
    for i in range(size):
        # Loop through rows in Y/col in X
        for j in range(size):
            # Loop through elements in each row/col and 
            for k in range(size):
                # Fill in the resulting matrix with the value at i,j
                result[i][j] += X[i][k] * Y[k][j]
    return result

In [None]:
# Load the functions in from the file we created above
from my_function import my_matMul, make_square_matrix

In [None]:
# Run the line profiler
X = make_square_matrix(200)
Y = make_square_matrix(200)
# -T gives the output filename
# -f is the function to profile
# The end is how you're going to run your function
%lprun -T profMatMul -f my_matMul my_matMul(X,Y)

In [None]:
X = make_square_matrix(200)
Y = make_square_matrix(200)
# You can also profile different functions inside of the function you want to run
%lprun -T profSqaureMatrix -f make_square_matrix my_matMul(X,Y)