Names:

In [None]:
# Import statements
import numpy as np
import timeit # we will be using this to time our code accurately
%alias_magic t timeit 

In [None]:
# Set up a dictionary to deal with units in timeit
seconds_units = {}
seconds_units['ns'] = 1E-9
seconds_units['us'] = 1E-6
seconds_units['ms'] = 1E-3

# What makes a difference to code speed?

For each of the examples below:
1. make an initial guess at which you think will be quicker (A or B (or C), or no difference) - write this down!!
2. Run the code and time it - record which is quicker and by how much (a factor, not in microseconds). Was your guess correct or not?

## Built in functions (and loops) vs writing your own

See, e.g. https://docs.python.org/3.7/library/functions.html

Many of Python’s built-in functions are written in C.

Initial guess: ? will be quicker because ?

A. Built in function sum, used to sum up all the numbers from 1 to 10000

In [None]:
%%t -n1000
X = sum(range(10000))

B. writing a for loop to sum up all the numbers from 1 to 10000

In [None]:
%%t -n1000
X = 0
for i in range(10000):
    X += i

## For loops vs while loops

In [None]:
Initial guess: ? will be quicker because ?

A. for loop to sum up all numbers from 1 to 1000

In [None]:
%%t -n1000
result = 0
for i in range(1000):
    result += i

B. while loop to sum up all numbers from 1 to 1000

In [None]:
%%t -n1000
result = 0
i = 0
while i < 1000:
    result += i
    i +=1

## Looping over Function calls

In [None]:
Initial guess: ? will be quicker because ?

A. looping within the function

In [None]:
def calc_squares(irange):
  squares = []
  for i in range(irange):
    squares.append(i**2)
  return squares

In [None]:
%%t -n1000
squares = calc_squares(1000)

B. Looping over function calls

In [None]:
def calc_square(num):
    return num**2

In [None]:
%%t -n1000    
squares = []
for i in range(1000):
    squares.append(calc_square(i))

## Numpy vs standard functions

Initial guess: ? will be quickest because ?, ? will be slowest because ?

A. python built in function range, used to sum numbers from 1 to 10000

In [None]:
%%t -n1000
X1 = sum(range(10000))


B. using numpy np.arange to make the array and np.sum to sum it

In [None]:
%%t -n1000
X2 = np.sum(np.arange(0,10000))

C. Some numpy (np.sum) combined with non-numpy (range)?

In [None]:
%%t -n1000
X4 = np.sum(np.array(range(10000)))

## Numpy array arthimetic vs looping

Initial guess: ? will be quickest because ?, ? will be slowest because ?

A. Looping through numpy arrays to perform array multiplication

In [None]:
%%t -n1000
X1 = np.array([np.arange(0,1000),np.arange(-1000,0)])
X2 = np.array([np.arange(0,1000)**2,np.arange(-1000,0)**2])
X3 = np.zeros(X1.shape)
# calculate X1 + X2
for ii in range(0,len(X1[:,0])):
    for jj in range(0,len(X1[0,:])):
        X3[ii,jj] = X1[ii,jj] * X2[ii,jj]

B. Using np.multiply

In [None]:
%%t -n1000
X1 = np.array([np.arange(0,1000),np.arange(-1000,0)])
X2 = np.array([np.arange(0,1000)**2,np.arange(-1000,0)**2])
# calculate X1 + X2
X3_2 = np.multiply(X1,X2)

In [None]:
# Check they actually do give the same result
X1 = np.array([np.arange(0,1000),np.arange(-1000,0)])
X2 = np.array([np.arange(0,1000)**2,np.arange(-1000,0)**2])

X3 = np.zeros(X1.shape)
# calculate X1 + X2
for ii in range(0,len(X1[:,0])):
    for jj in range(0,len(X1[0,:])):
        X3[ii,jj] = X1[ii,jj] * X2[ii,jj]
        
# calculate X1 + X2
X3_2 = np.multiply(X1,X2)

print(np.amax(X3-X3_2))
print(np.sum(X3-X3_2))

# Challenge: speed up this inefficient code as much as possible

Your goal: speed up the following code as much as possible. 
Some websites that might help: 

    https://junye0798.com/post/ten-tricks-to-spedd-up-your-python-codes/
    https://towardsdatascience.com/10-ways-to-speed-up-your-python-code-e3d57630b710
    

We'll use a really simple example for this .
Imagine that we want to calculate X4 = k_1 * X1 + k_2 * X2 + k_3 * X3 + X2*X3 where X1, X2, and X3 are arrays, and k1, k2 and k3 are constants, and we have three different sets of constants (k1,k2,k3).
Your task is to speed this up as much as possible - because some computers may be faster than others, we'll compare the relative speed, not the absolute speed!
You will be disqualified if your result is not the same numbers as the original (it does not have to be in the same format), so you should check this.

Don't change the code in the following boxes. This is the original code, and we will store the original time it took for comparison to your sped-up code

In [None]:
%%capture result_pre
%%timeit -n100
# these 2 magic cells need to be in this order, at the top of the cell. 
# They run the code below 100 times, time this, and save the output of this time test into result_pre

# X1 is all numbers 0 to 1000 in row 1, and all numbers 2000 to 3000 in row 2
X1 = [range(0,1000),range(2000,3000)]

# X2 is all numbers squared from 1000 to 2000 in row 1, and all numbers squared from 4000 to 5000 in row 2
def calc_square(num):
    return num**2
    
squares = []
for i in range(1000,2000):
    squares.append(calc_square(i))
squares2 = []
for i in range(4000,5000):
    squares2.append(calc_square(i)) 
X2 = [squares,squares2]

# X3 is all integers cubed from 0 to 1000 in row 1 and all integers cubed from -1000 to 0 in row 2
def calc_cube(num):
    return num**3
    
cubes = []
for i in range(0,1000):
    cubes.append(calc_cube(i))
cubes2 = []
for i in range(-1000,0):
    cubes2.append(calc_cube(i)) 
X3 = [cubes,cubes2]

ks = dict()
ks['k1'] = (2.5,2.5,2.5)
ks['k2'] = (2.3,2.4,2.5)
ks['k3'] = (1,4,9)

def multiply_values(k,X):
    kX = k * X
    return(kX)

X4_pre = {}
# calculate X1 + X2
for testcase in range(0,3):
    X4_pre[testcase] = []
    for ii in range(0,len(X1)):
        newvalues = []
        for jj in range(0,len(X1[0])):
            k1X1 = multiply_values(ks['k1'][testcase],X1[ii][jj])
            k2X2 = multiply_values(ks['k2'][testcase],X2[ii][jj])
            k3X3 = multiply_values(ks['k3'][testcase],X3[ii][jj])
            
            X2X3 = multiply_values(X2[ii][jj],X3[ii][jj])
            newvalues.append(k1X1 + k2X2 + k3X3 + X2X3)
            
        X4_pre[testcase].append(newvalues)


In [None]:
print(result_pre)

In [None]:
# print time taken pre speed-up from the values saved by the "magic" code in previous cell
time_pre = float(str(result_pre).split()[0])
units_pre = (str(result_pre).split()[1])

print(time_pre, units_pre)
# convert units to numerical value
time_pre_seconds = time_pre*seconds_units[units_pre]

Adapt the code below to speed it up as much as possible. Then use the code in the following cells to see how much quicker it is. 

In [None]:
%%capture result_post
%%timeit -n100
# these 2 magic cells need to be in this order, at the top of the cell. 
# They run the code below 100 times, time this, and save the output of this time test into result_post

# Your code here

# X1 is all numbers 0 to 1000 in row 1, and all numbers 2000 to 3000 in row 2
X1 = [range(0,1000),range(2000,3000)]

# X2 is all numbers squared from 1000 to 2000 in row 1, and all numbers squared from 4000 to 5000 in row 2
def calc_square(num):
    return num**2
    
squares = []
for i in range(1000,2000):
    squares.append(calc_square(i))
squares2 = []
for i in range(4000,5000):
    squares2.append(calc_square(i)) 
X2 = [squares,squares2]

# X3 is all integers cubed from 0 to 1000 in row 1 and all integers cubed from -1000 to 0 in row 2
def calc_cube(num):
    return num**3
    
cubes = []
for i in range(0,1000):
    cubes.append(calc_cube(i))
cubes2 = []
for i in range(-1000,0):
    cubes2.append(calc_cube(i)) 
X3 = [cubes,cubes2]

ks = dict()
ks['k1'] = (2.5,2.5,2.5)
ks['k2'] = (2.3,2.4,2.5)
ks['k3'] = (1,4,9)

def multiply_values(k,X):
    kX = k * X
    return(kX)

X4_post = {}
# calculate X1 + X2
for testcase in range(0,3):
    X4_post[testcase] = []
    for ii in range(0,len(X1)):
        newvalues = []
        for jj in range(0,len(X1[0])):
            k1X1 = multiply_values(ks['k1'][testcase],X1[ii][jj])
            k2X2 = multiply_values(ks['k2'][testcase],X2[ii][jj])
            k3X3 = multiply_values(ks['k3'][testcase],X3[ii][jj])
            
            X2X3 = multiply_values(X2[ii][jj],X3[ii][jj])
            newvalues.append(k1X1 + k2X2 + k3X3 + X2X3)
            
        X4_post[testcase].append(newvalues)




In [None]:
print(result_post)

In [None]:
# print time taken pre speed-up from the values saved by the "magic" code in previous cell
time_post = float(str(result_post).split()[0])
units_post = (str(result_post).split()[1])

# convert units to numerical value and calculate speedup factor
time_post_seconds = time_post*seconds_units[units_post]
speedup = time_pre_seconds/time_post_seconds
print("speedup factor = " + "{:4.1f}".format(speedup))

In [None]:
## Your code to check the results are identical
# You will have to take the code out of the magic "timeit" cells to save X4_pre and X4_post to accessible variables