Names:

In [1]:
# Import statements
import numpy as np
import timeit
# using this to time our code accurately
%alias_magic t timeit 

Created `%t` as an alias for `%timeit`.
Created `%%t` as an alias for `%%timeit`.


## Some inefficient code

Your goal: speed up the following code as much as possible. 
Some websites that might help: 

    https://junye0798.com/post/ten-tricks-to-spedd-up-your-python-codes/
    https://towardsdatascience.com/10-ways-to-speed-up-your-python-code-e3d57630b710
    

We'll use a really simple example for this .
Imagine that we want to calculate X4 = k_1 * X1 + k_2 * X2 + k_3 * X3 + X2*X3 where X1, X2, and X3 are arrays, and k1, k2 and k3 are constants, and we have three different sets of constants (k1,k2,k3).
Your task is to speed this up as much as possible - because some computers may be faster than others, we'll compare the relative speed, not the absolute speed!
You will be disqualified if your result is not the same numbers as the original (it does not have to be in the same format), so you should check this.

In [2]:
# Set up a dictionary to help us with timing
seconds_units = {}
seconds_units['ns'] = 1E-9
seconds_units['us'] = 1E-6
seconds_units['ms'] = 1E-3


In [3]:
%%capture result_pre
%%timeit -n100
# these 2 magic cells need to be in this order, at the top of the cell. 
# They run the code below 100 times, time this, and save the output of this time test into result_pre

# X1 is all numbers 0 to 1000 in row 1, and all numbers 2000 to 3000 in row 2
X1 = [range(0,1000),range(2000,3000)]

# X2 is all numbers squared from 1000 to 2000 in row 1, and all numbers squared from 4000 to 5000 in row 2
def calc_square(num):
    return num**2
    
squares = []
for i in range(1000,2000):
    squares.append(calc_square(i))
squares2 = []
for i in range(4000,5000):
    squares2.append(calc_square(i)) 
X2 = [squares,squares2]

# X3 is all integers cubed from 0 to 1000 in row 1 and all integers cubed from -1000 to 0 in row 2
def calc_cube(num):
    return num**3
    
cubes = []
for i in range(0,1000):
    cubes.append(calc_cube(i))
cubes2 = []
for i in range(-1000,0):
    cubes2.append(calc_cube(i)) 
X3 = [cubes,cubes2]

ks = dict()
ks['k1'] = (2.5,2.5,2.5)
ks['k2'] = (2.3,2.4,2.5)
ks['k3'] = (1,4,9)

def multiply_values(k,X):
    kX = k * X
    return(kX)

X4_pre = {}
# calculate X1 + X2
for testcase in range(0,3):
    X4_pre[testcase] = []
    for ii in range(0,len(X1)):
        newvalues = []
        for jj in range(0,len(X1[0])):
            k1X1 = multiply_values(ks['k1'][testcase],X1[ii][jj])
            k2X2 = multiply_values(ks['k2'][testcase],X2[ii][jj])
            k3X3 = multiply_values(ks['k3'][testcase],X3[ii][jj])
            
            X2X3 = multiply_values(X2[ii][jj],X3[ii][jj])
            newvalues.append(k1X1 + k2X2 + k3X3 + X2X3)
            
        X4_pre[testcase].append(newvalues)


In [4]:
print(result_pre)

6.54 ms +- 162 us per loop (mean +- std. dev. of 7 runs, 100 loops each)



In [5]:
# print time taken pre speed-up from the values saved by the "magic" code in previous cell
time_pre = float(str(result_pre).split()[0])
units_pre = (str(result_pre).split()[1])

print(time_pre, units_pre)
# convert units to numerical value
time_pre_seconds = time_pre*seconds_units[units_pre]

6.54 ms


In [6]:
%%capture result_post
%%timeit -n100
# these 2 magic cells need to be in this order, at the top of the cell. 
# They run the code below 100 times, time this, and save the output of this time test into result_post

# Your code here

# X1 is all numbers 0 to 1000 in row 1, and all numbers 2000 to 3000 in row 2
X1 = [range(0,1000),range(2000,3000)]

# X2 is all numbers squared from 1000 to 2000 in row 1, and all numbers squared from 4000 to 5000 in row 2
def calc_square(num):
    return num**2
    
squares = []
for i in range(1000,2000):
    squares.append(calc_square(i))
squares2 = []
for i in range(4000,5000):
    squares2.append(calc_square(i)) 
X2 = [squares,squares2]

# X3 is all integers cubed from 0 to 1000 in row 1 and all integers cubed from -1000 to 0 in row 2
def calc_cube(num):
    return num**3
    
cubes = []
for i in range(0,1000):
    cubes.append(calc_cube(i))
cubes2 = []
for i in range(-1000,0):
    cubes2.append(calc_cube(i)) 
X3 = [cubes,cubes2]

ks = dict()
ks['k1'] = (2.5,2.5,2.5)
ks['k2'] = (2.3,2.4,2.5)
ks['k3'] = (1,4,9)

def multiply_values(k,X):
    kX = k * X
    return(kX)

X4_post = {}
# calculate X1 + X2
for testcase in range(0,3):
    X4_post[testcase] = []
    for ii in range(0,len(X1)):
        newvalues = []
        for jj in range(0,len(X1[0])):
            k1X1 = multiply_values(ks['k1'][testcase],X1[ii][jj])
            k2X2 = multiply_values(ks['k2'][testcase],X2[ii][jj])
            k3X3 = multiply_values(ks['k3'][testcase],X3[ii][jj])
            
            X2X3 = multiply_values(X2[ii][jj],X3[ii][jj])
            newvalues.append(k1X1 + k2X2 + k3X3 + X2X3)
            
        X4_post[testcase].append(newvalues)




In [7]:
print(result_post)

6.68 ms +- 70.3 us per loop (mean +- std. dev. of 7 runs, 100 loops each)



In [8]:
# print time taken pre speed-up from the values saved by the "magic" code in previous cell
time_post = float(str(result_post).split()[0])
units_post = (str(result_post).split()[1])

# convert units to numerical value and calculate speedup factor
time_post_seconds = time_post*seconds_units[units_post]
speedup = time_pre_seconds/time_post_seconds
print("speedup factor = " + "{:4.1f}".format(speedup))

speedup factor =  1.0


In [9]:
## Your code to check the results are identical
# You will have to take the code out of the magic "timeit" cells to save X4_pre and X4_post to accessible variables