# NumPy Exercises

Tamás Gál (tamas.gal@fau.de)

The latest version of this notebook is available at [https://github.com/escape2020/school2021](https://github.com/escape2020/school2021)

In [None]:
import numpy as np
import numba as nb
import numexpr as ne
import sys

print(f"Python version:  {sys.version}\n"
      f"NumPy version:   {np.__version__}\n"
      f"Numba version:   {nb.__version__}\n"
      f"NumExpr version:   {ne.__version__}")

rng = np.random.default_rng(42)  # initialise our random number generator

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (16, 5)
plt.rcParams['figure.dpi'] = 300

## Exercise 1: Extract the integer part of a random sample

    rng.uniform(0, 10, 10)
    
e.g. `[23.5, 42.0, 500.3, 123.9] -> [23, 42, 500, 123]`

### Solution:

In [None]:
a = np.array([23.5, 42.0, 500.3, 123.9])
a

In [None]:
a - a%1

In [None]:
np.floor(a)

In [None]:
np.ceil(a) - 1

In [None]:
np.trunc(a)

In [None]:
a.astype(int)

### Further discussions

In [None]:
a = rng.uniform(0, 10, 10000)

In [None]:
%timeit a - a%1

In [None]:
%timeit np.floor(a)

In [None]:
%timeit np.ceil(a) - 1

In [None]:
%timeit np.trunc(a)

In [None]:
%timeit a.astype(int)  # the winner -> casting

## Exercise 2: Create a 5x5 matrix with 5's on its diagonal

```
5 0 0 0 0
0 5 0 0 0
0 0 5 0 0
0 0 0 5 0
0 0 0 0 5
```

### Solution: `np.eye()`

In [None]:
np.eye(5)

In [None]:
np.eye(5) * 5

In [None]:
np.eye(5)

### Alternative solutions and further discussions

In [None]:
%%timeit
a = np.eye(1000) * 5

In [None]:
%%timeit
a = np.eye(1000)
np.multiply(a, 5, out=a)  # avoid creating a copy 

In [None]:
%%timeit
a = np.zeros((1000, 1000))
a[np.diag_indices_from(a)] = 5

In [None]:
a = np.zeros((10, 10))
np.diag_indices_from(a)

In [None]:
%timeit np.diag(np.full(1000, 5))

In [None]:
%timeit np.diag(np.ones(1000) * 5)

### Numba?

In [None]:
@nb.njit
def diag_nb(n, value):
    return np.diag(np.ones(n) * value)

In [None]:
%timeit diag_nb(1000, 5)

In [None]:
@nb.njit
def diag2_nb(n, value):
    mat = np.zeros((n, n))
    for i in range(n):
        mat[i,i] = value
    return mat

In [None]:
%timeit diag2_nb(1000, 5)

## Exercise 3: Calculate `c`, with:

    a = rng.random(1234567)
    b = rng.random(1234567)
  
so that

$$
c_i = \tan(a_i) \cdot b_i - a_i^{b_i}
$$

for $i \in [0, 1234566]$

In [None]:
a = rng.random(1234567)
b = rng.random(1234567)

### Solution:

In [None]:
def f(a, b):
    return np.tan(a) * b - a**b

In [None]:
%timeit f(a, b)

In [None]:
%timeit ne.evaluate("tan(a)*b - a**b")

### What about a Python loop?

In [None]:
def tanabab(a, b):
    c = np.empty_like(a)
    for i in range(len(a)):
        c[i] = np.tan(a[i]) * b[i] - np.power(a[i], b[i])
    return c

In [None]:
%timeit tanabab(a, b)

### Let's JIT it with `numba`!

In [None]:
@nb.jit
def tanabab_nb(a, b):
    c = np.empty_like(a)
    for i in range(len(a)):
        c[i] = np.tan(a[i]) * b[i] - np.power(a[i], b[i])
    return c

In [None]:
%time tanabab_nb(a, b)  # first execution includes the compilation!

In [None]:
%timeit tanabab_nb(a, b)  # the second is pure LLVM optimised code

In [None]:
@nb.jit
def tanabab_nb_mutating_a(a, b):
    for i in range(len(a)):
        a[i] = np.tan(a[i]) * b[i] - np.power(a[i], b[i])

In [None]:
%time tanabab_nb_mutating_a(a, b);  # first execution includes the compilation!

In [None]:
a = rng.random(1234567)
b = rng.random(1234567)

In [None]:
%timeit tanabab_nb_mutating_a(a, b);  # the second is pure LLVM optimised code

Summary (your mileage may vary):
- **~2200ms** (Python)
- **~75ms** (reusing `a`, numba, inc. JIT comp.)
- **~60ms** (numba, inc. JIT comp.)
- **~17ms** (numpy)
- **~16ms** (numba, JIT)
- **~8ms** (reusing `a`, numba, JIT)
- **~4ms** (numexpr)

## Exercise 4: Given two arrays `a` anb `b`, check if they are (almost) equal

    a = np.random.random(1234567)
    b = a.copy()

    b[-1] = 23  # artificially make them differ at the very end ;)

In [None]:
a = rng.random(1234567)
b = a.copy()
c = a.copy()

b[-1] = 23  # make them differ at the very end ;)
c[0] = 23   # make them differ at the beginning

### Solution:

In [None]:
%timeit np.allclose(a, b)

In [None]:
%timeit np.allclose(a, c)

### Using numba?

In [None]:
@nb.jit
def allclose(a, b, tol=0.0001):
    for i in range(len(a)):
        if np.abs(a[i] - b[i]) > tol:
            return False 
    return True

In [None]:
%timeit allclose(a, b)

In [None]:
%timeit allclose(a, c)

In [None]:
%timeit np.count_nonzero(a == b) == a.size

## Exercise 5: Create the following matrix

    1 2 3 4 5 6
    1 2 3 4 5 6
    1 2 3 4 5 6
    1 2 3 4 5 6
    1 2 3 4 5 6


### Solution:

In [None]:
np.ones((5, 6))

In [None]:
np.arange(1, 7)

In [None]:
np.ones((5, 6)) * np.arange(1, 7)

In [None]:
np.ones(5)[:, np.newaxis] * np.arange(1, 7)

### Alternative solutions and further discussions

In [None]:
%timeit np.ones((500, 6)) * np.arange(1, 7)

In [None]:
%%timeit
a = np.ones((500, 6))
np.multiply(a, np.arange(1, 7), out=a)

In [None]:
np.ones(5)

In [None]:
np.ones(5).shape

In [None]:
np.ones(5)[:, np.newaxis] * np.arange(1, 7)

In [None]:
%timeit np.ones(500)[:, np.newaxis] * np.arange(1, 7)

In [None]:
%%timeit
a = np.empty((500, 6))
a[:] = np.arange(1, 7)

In [None]:
np.ones(5)

In [None]:
np.ones(5)[:, np.newaxis]  # adds a new dimension

In [None]:
np.ones(5)[:, np.newaxis].shape

In [None]:
np.arange(1, 7).shape

In [None]:
# broadcasting will turn (5, 1) and (6,) into (5, 6)
(np.ones(5)[:, np.newaxis] * np.arange(1, 7)).shape

### Numba?

In [None]:
@nb.njit
def grad_nb(n, m):
    mat = np.empty((n, m))
    for i in range(m):
        for j in range(n):
            mat[j,i] = i + 1
    return mat

In [None]:
%timeit grad_nb(500, 6)

In [None]:
int_type = np.int32

@nb.njit
def grad_int_nb(n, m):
    mat = np.empty((n, m), dtype=int_type)
    for i in range(1, m):
        for j in range(n):
            mat[j,i] = i + 1
    return mat

In [None]:
%timeit grad_int_nb(500, 6)

## Exercise 6: Roll two 6-sided dice 100 times and count each individual value

### Solution:

This is an ugly, hardcoded solution:

In [None]:
def roll_dice(n):
    dice_1 = rng.integers(1, 6, n)
    dice_2 = rng.integers(1, 6, n)
    sums = dice_1 + dice_2
    return np.unique(sums, return_counts=True)

In [None]:
roll_dice(100)

## Exercise 7: Roll five 12-sided dice 123456 times and count each individual value

If you did it right, you now only need to change 2 parameters of your previous code ;)

If not, write an appropriate function.

Create a histogram of the values!

### Solution:

In [None]:
def roll_dice(n_rolls, n_sides, n_die):
    rolls = np.sum(rng.integers(1, n_sides+1, n_rolls*n_die)
                   .reshape(n_die, n_rolls), axis=0)
    return np.unique(rolls, return_counts=True)

In [None]:
rolls = roll_dice(123456, 12, 5)

In [None]:
plt.hist(range(len(rolls[1])), bins=rolls[0], weights=rolls[1]);

## Exercise 8: Find the value closest to a given number in an array

    a = rng.random(10)
    target = 0.23

### Solution:

In [None]:
a = rng.random(10)
target = 0.23
a

In [None]:
a[np.argmin(np.abs(a - target))]

### Numba?

In [None]:
a = rng.random(1000)

In [None]:
%timeit a[np.argmin(np.abs(a - target))]

In [None]:
@nb.jit
def find_closest(arr, target):
    idx = 0
    delta = np.nan
    for i in range(len(arr)):
        _delta = abs(arr[i] - target)
        if _delta < delta:
            delta = _delta
            idx = i
    return arr[idx]

In [None]:
%timeit find_closest(a, 0.23)

## Exercise 9: Create a random array with 10 elements and replace its largest value with 0

## Bonus: Replace the second largest value with 0

### Solution:

In [None]:
a = rng.integers(0, 100, 10)
a

In [None]:
np.argmax(a)  # gives the index of the maximum
a[np.argmax(a)] = 0
a

### Solution for the bonus:

In [None]:
a = rng.integers(0, 100, 10)
a

In [None]:
idx = a.argsort()[-2]  # index of the second largest value
a[idx] = 0
a

In [None]:
a[np.argpartition(a, -2)[-2]] = 0
a

This `np.argpartition` thing is a bit tricky, let's examine this...

In [None]:
a = np.array([5, 4, 7, 9])

`np.partition` will "partition" the array, so that the it guarantees that the element at the specified index will sit in the correct position **and** every element to the left is less or equal and every element to the right is greater or equal to it (in undefined order).

In [None]:
np.partition(a, 1)

Here, you can see that if the array "was" sorted (it would be 4, 5, 7, 9), the `5` should sit at the position 1 (counting from 0). This can be much quicker than sorting the full array.

In the solution above, we pass `-2` which means that the second last element should sit in place, so that we catch the two "largest" values (they are either the same or the last one is the largest).

`np.argpartition` will return the indices instead, which we can use to pick the position of the n-th largest element and set it to zero.

In [None]:
np.argpartition(a, 1)  # the index of five (0) is sitting on position 1

### Numba?

In [None]:
a = np.random.random(10000000)

In [None]:
%timeit a[np.argmax(a)] = 0

In [None]:
@nb.njit
def zero_largest(arr):
    idx = 0
    largest = arr[0]
    for i in range(len(arr)):
        if arr[i] > largest:
            largest = arr[i]
            idx = i
    arr[idx] = 0.0

In [None]:
%timeit zero_largest(a)

### Using numba to solve the bonus exercise

In [None]:
a = rng.random(10000000)

In [None]:
%%timeit
idx = np.argsort(a)[-2]
a[idx] = 0

In [None]:
%%timeit 
a[np.argpartition(a, -2)[-2]] = 0

In [None]:
@nb.njit
def zero_second_largest(arr):
    idx_largest = 0
    idx_second_largest = 0
    largest = arr[0]
    second_largest = 0
    for i in range(len(arr)):
        if arr[i] > largest:
            second_largest, largest = largest, arr[i]
            idx_second_largest, idx_largest = idx_largest, i
    arr[idx_second_largest] = 0

In [None]:
%timeit zero_second_largest(a)

## Exercise 10: Create a checkerboard (8x8, 0s and 1s)

    0 1 0 1 0 1 0 1
    1 0 1 0 1 0 1 0
    0 1 0 1 0 1 0 1
    1 0 1 0 1 0 1 0
    0 1 0 1 0 1 0 1
    1 0 1 0 1 0 1 0
    0 1 0 1 0 1 0 1
    1 0 1 0 1 0 1 0

### Solution:

In [None]:
checkerboard = np.zeros((8, 8), dtype='i')
checkerboard[::2, 1::2] = 1
checkerboard[1::2, ::2] = 1
checkerboard

In [None]:
plt.imshow(checkerboard)

###  Numba?

In [None]:
%%timeit
checkerboard = np.zeros((8, 8), dtype='i')
checkerboard[::2, 1::2] = 1
checkerboard[1::2, ::2] = 1
checkerboard

In [None]:
import numpy as np
import numba as nb

In [None]:
@nb.njit
def checkerboard(n):
    board = np.zeros((n, n), dtype='i')
    for i in range(n):
        for j in range(n):
            k = i + j*2
            while k >= n:
                k = k - n
            board[k,i] = 1
            
    return board

In [None]:
checkerboard(8)

In [None]:
%timeit checkerboard(8)

## Exercise 11: Calculate the diagonal of a dot product

In [None]:
A = rng.random((5, 5))
B = rng.random((5, 5))

### Solution:

In [None]:
%timeit np.diag(np.dot(A, B))

In [None]:
%timeit np.sum(A * B.T, axis=1)

In [None]:
%timeit np.einsum("ij,ji->i", A, B)