# Part 1: Speed Comparison: Numpy vs. Python

In [2]:
import time
import numpy as np

## Dot Product

Given two vectors $a$ and $b$, we want to compute $$a.b = a_1b_1 + a_2b_2 + ... + a_nb_n$$

In [3]:
VECTOR_SIZE = int(1e8)

a = np.random.rand(VECTOR_SIZE)
b = np.random.rand(VECTOR_SIZE)

### Compute dot product using numpy

In [4]:
start_time = time.time()
ans = np.dot(a, b)
end_time = time.time()
print(f"Answer = {ans:.2f}\nTime taken to compute answer = {end_time-start_time:.2f}s")

Answer = 25001032.57
Time taken to compute answer = 0.20s


### Compute dot product using Python for loops

In [5]:
ans = 0
start_time = time.time()
for i in range(len(a)):
    ans += (a[i] * b[i])
end_time = time.time()
print(f"Answer = {ans:.2f}\nTime taken to compute answer = {end_time-start_time:.2f}s")

Answer = 25001032.57
Time taken to compute answer = 49.38s


#### Looks like numpy dot product is over 300x faster!!

## Matrix Multiplication

Given two matrices $A$ and $B$ of sizes $p*q$ and $q*r$ respectively, we want to compute $$AB = a_{11}b_{11} + a_{12}b_{21} + a_{13}b_{31} + ... + a_{pq}b_{qr}$$

In [6]:
A = np.random.normal(size=(2000, 1000)).astype('float32')
B = np.random.normal(size=(1000, 3000)).astype('float32')

In [7]:
start_time = time.time()
ans = np.matmul(A, B)  # A @ B is equivalent
end_time = time.time()
print(f"Answer = {ans[:2,:2]}...\nTime taken to compute answer = {end_time-start_time:.2f}s")

Answer = [[-21.61567    -4.2245626]
 [ -2.9993439  22.376247 ]]...
Time taken to compute answer = 0.17s


In [8]:
def py_matmul(A, B):
    p, q = A.shape
    x, r = B.shape
    assert q == x, "Dimensions don't match!"
    
    output = np.zeros(shape=(p, r))
    for i in range(p):
        for j in range(r):
            for k in range(q):
                output[i, j] += A[i, k] * B[k, j]
                
    return output

In [None]:
start_time = time.time()
ans = py_matmul(A, B)
end_time = time.time()
print(f"Answer = {ans[:2,:2]}...\nTime taken to compute answer = {end_time-start_time:.2f}s")

Answer = [[-19.66146792 -23.37398979]
 [  3.05616885  30.46048401]]...
Time taken to compute answer = 5812.78s


#### Oops. Will this even compute? 
Exercise: How many times will line 10 execute? What is the complexity of the above matrix multiplication code in big-O notation? 

## Checking whether a list is unique

In [9]:
VECTOR_SIZE = int(1e8)
a = np.ones(VECTOR_SIZE)
b = np.random.rand(VECTOR_SIZE)

In [10]:
start_time = time.time()
a_uniq = np.unique(a)
b_uniq = np.unique(b)
end_time = time.time()
print(f"Is a unique? = {len(a_uniq)==1}\nIs b unique? = {len(b_uniq)==1}\nTime taken to compute answer = {end_time-start_time:.2f}s")

Is a unique? = True
Is b unique? = False
Time taken to compute answer = 19.07s


**Exercise:** Why does the unique operation take ~20s with numpy when the dot and matmul operations took a fraction of a second? 

(Hint: Think about behind-the-scenes vectorization of operations)


**NOTE: It is STILL 3x faster than the below Python for loop implementation.**

In [None]:
start_time = time.time()
a_uniq = True
for elem in a:
    if elem != a[0]:
        a_uniq = False
b_uniq = True
for elem in b:
    if elem != b[0]:
        b_uniq = False
end_time = time.time()
print(f"Is a unique? = {a_uniq}\nIs b unique? = {b_uniq}\nTime taken to compute answer = {end_time-start_time:.2f}s")

Is a unique? = True
Is b unique? = False
Time taken to compute answer = 67.34s


#  Part 2: Other Important Numpy Operations

## Returning frequency of elements in a list

In [11]:
a = np.ones(VECTOR_SIZE)
counts = np.unique(a, return_counts=True)
dict(zip(list(counts[0]), counts[1]))

{1.0: 100000000}

In [12]:
b = np.array(["A", "notA", "A", "A", "notA"])
counts = np.unique(b, return_counts=True)
dict(zip(list(counts[0]), counts[1]))

{'A': 3, 'notA': 2}

## Stacking arrays horizontally
Hint: for folding in bias terms

In [None]:
a = np.array([
    [1, 2, 3], 
    [4, 5, 6], 
    [7, 8, 9],       
])
b = np.array([
    [1],       
    [1],       
    [1],       
])
np.hstack((a, b))

array([[1, 2, 3, 1],
       [4, 5, 6, 1],
       [7, 8, 9, 1]])

## Manipulating dimensions or axes 

Convert a a 2-D matrix of size $n*1$ into a 1-D numpy vector of length $n$ 

In [None]:
b_vec = np.squeeze(b, axis=1)
b_vec

array([1, 1, 1])

Convert a 1-D numpy vector of length $n$ to a 2-D matrix of size $n*1$

In [None]:
b = np.expand_dims(b_vec, axis=1)
b

array([[1],
       [1],
       [1]])

## Transpose operation
Yes. It is as simple as the math notation you've learned in lecture.

In [None]:
a.T

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

References:

[1] https://towardsdatascience.com/how-fast-is-c-compared-to-python-978f18f474c7

