# Introduction to NumPy

Inspired by Tamás Gál

The latest version of this notebook is available at [https://github.com/escape2020/school2021](https://github.com/escape2020/school2021)

In [None]:
import numpy as np
import sys

print(f"Python version:  {sys.version}\n"
      f"NumPy version:   {np.__version__}")

rng = np.random.default_rng(42)  # initialise our random number generator

In [None]:
def describe(np_obj):
    """Print some information about a NumPy object"""
    print("object type: {0}\n"
          "size: {o.size}\n"
          "ndim: {o.ndim}\n"
          "shape: {o.shape}\n"
          "dtype: {o.dtype}"
          .format(type(np_obj), o=np_obj))

In [None]:
from IPython.core.magic import register_line_magic

@register_line_magic
def shorterr(line):
    """Show only the exception message if one is raised."""
    try:
        output = eval(line)
    except Exception as e:
        print("\x1b[31m\x1b[1m{e.__class__.__name__}: {e}\x1b[0m".format(e=e))
    else:
        return output
    
del shorterr

## The basic datastructure in NumPy: `ndarray`

In [None]:
a = np.array([1.2, 2, 3, 4, 5, 6])
a

In [None]:
type(a)

### Array properties

In [None]:
a.size  # number of elements

In [None]:
a.ndim

In [None]:
a.shape

In [None]:
a.dtype

### Multi-Dimensional Arrays

In [None]:
b = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
b

In [None]:
describe(b)

### Array Methods

In [None]:
a.min(), a.max(), a.mean(), a.sum()

In [None]:
b

In [None]:
b.sum()

In [None]:
b.sum(axis=0)

In [None]:
b.sum(axis=1)

## Operations with Arrays

In [None]:
a

In [None]:
a - 42

In [None]:
a * 42 / np.pi

In [None]:
a**np.e, np.e**a

In [None]:
np.cos(a)

In [None]:
np.sin(a)

In [None]:
a * a  # element-wise

In [None]:
a @ a  # use np.dot(a, a) if you are using < Python 3.5

In [None]:
a

In [None]:
a < 3

In [None]:
a == 4

In [None]:
(a > 3) & (a < 5)  # bitwise AND

In [None]:
a < np.array([2, 3, 5, 2, 1, 5])

In [None]:
a > 2

In [None]:
np.sum(a > 2)

## Basic Indexing and Slicing

In [None]:
a[5]  # indexing starts at 0

In [None]:
a[-1]  # -1 refers to the last element

In [None]:
a[2:6:3]  # just like in Python: [start:end:step]

In [None]:
a[::-1]  # reversing an array

In [None]:
b[::-1]  # reverses axis 0

### Indixing and Slicing in Multiple Dimensions

In [None]:
b

In [None]:
b[0, 2]

In [None]:
b[0, 1:4]

In [None]:
b[:, 1:4]  # the `:` selects the whole axis

In [None]:
b[:, 2:5:2]

In [None]:
b[::-1, ::-1]  # reverses both axes

### Advanced Indexing

In [None]:
d = np.array([4, 3, 2, 5, 4, 5, 4, 4])
d

In [None]:
mask = np.array([True, False, False, True, False, False, True, True])
mask

In [None]:
d[mask]

In [None]:
d[[1, 3, 1, 6]]

#### Be careful with boolean indexing, the mask has to be a boolean array or a list of booleans.

In [None]:
d

In [None]:
d[[False, True, False, False, True, False, False, True]]

In [None]:
d[[0, 1, 0, 0, 1, 0, 0, 1]]  # although we know that True==1 and False==0

In [None]:
np.array([0, 1, 0, 0, 1, 0, 0, 1], dtype=bool)

In [None]:
d[np.array([0, 1, 0, 0, 1, 0, 0, 1], dtype=bool)] 

## The `dtype`

In [None]:
np.dtype

In [None]:
a, a.dtype

In [None]:
e = a * 42 / np.pi  # NumPy will choose the "right" `dtype` automatically
e, e.dtype

### Some Basic `dtype`s

In [None]:
np.dtype('f')

In [None]:
np.dtype('f8')

In [None]:
np.dtype('i')

In [None]:
np.dtype('i2')

In [None]:
np.dtype('c16')

In [None]:
np.dtype('S8')  # String with a fixed length of 8

### Using `dtype`s

In [None]:
np.array([1, 2, 3], dtype='c8')

## Helper Functions to Create Arrays

In [None]:
np.arange(7)

In [None]:
np.ones(10)

In [None]:
np.zeros(5)

In [None]:
np.zeros((2, 4))

In [None]:
np.empty(20)

In [None]:
np.eye(5)

In [None]:
np.linspace(1, 2, 10)

In [None]:
np.ones_like(b)

In [None]:
np.ones(10, dtype='i2')

### Random numbers

In [None]:
rng = np.random.default_rng(42)  # always create a generator with a seed!

In [None]:
rng.integers(1, 10, (2, 20))

In [None]:
rng.random((3, 4))

In [None]:
rng.uniform(0, 5, 10)

## Broadcasting

In [None]:
g = np.array([1, 2, 3, 4])
h = np.array([5, 6, 7, 8])
g * h  # if the shapes match, operations are usually done element-by-element

In [None]:
g * 23  # as we have already seen, the rule relaxes when the shapes meet certain constraints

### Broadcasting rules
- NumPy compares the shapes element-wise, starting with the trailing dimension
- two dimensions are compatible if they are equal or one of them is __1__
- raises a `ValueError: frames are not aligned` if the shapes are incompatible
- the size of a successfully broadcasted array is the maximus size along each dimension of the input arrays

### Operation on two arrays with different shapes
```
A      (4d array):  5 x 1 x 4 x 1
B      (3d array):      7 x 1 x 5
Result (4d array):  5 x 7 x 4 x 5
```

In [None]:
arr_1 = np.array([[1, 2, 3], [4, 5, 6]])
arr_2 = np.array([[1], [2]])

print('arr_1 shape:', arr_1.shape)
print('arr_2 shape:', arr_2.shape)

arr_3 = arr_1 + arr_2
print('arr_3 shape:', arr_3.shape)

arr_3

In [None]:
arr_1

In [None]:
arr_2

In [None]:
np.arange(20)

In [None]:
i = np.arange(20).reshape(4, 5)
i

In [None]:
describe(i)

In [None]:
i * np.array([0, 1, 2, 4, 5])

In [None]:
j = np.array([0, 10, 20, 30])
k = np.array([7, 8, 9])

In [None]:
%shorterr j+k

In [None]:
j[:, np.newaxis]  # inserts a new axis, making it two dimensional

In [None]:
k

In [None]:
j[:, np.newaxis] + k

## Universal Functions (`ufunc`)

#### A `ufunc` is a "vectorized" wrapper for a function that takes a fixed number of scalar inputs and produces a fixed number of scalar outputs.

NumPy provides a bunch of `ufunc`s:
- Math operations (`add()`, `subtract()`, `square()`, `log10()`, ...)
- Trigonometric functions (`sin()`, `cos()`, `tan()`, `deg2rad()`, ...)
- Bit-twiddling functions (`bitwise_and()`, `right_shift()`, ...)
- Comparison functions (`greater()`, `less_equal()`, `fmax()`, ...)
- Floating functions (`isnan()`, `isinf()`, `floor()`, ...)
    
They all are subclasses of `np.ufunc`

In [None]:
type(np.cos)  # they all are subclasses of np.ufunc

### Create your own `ufunc` with `np.frompyfunc(func, nin, nout)`

In [None]:
m = rng.integers(0, 100, 17)
m

In [None]:
def step_23(x):
    return 1 if x > 23 else 0

In [None]:
%shorterr step_23(m)

In [None]:
ustep_23 = np.frompyfunc(step_23, 1, 1)

In [None]:
ustep_23(42)

In [None]:
ustep_23(5)

In [None]:
ustep_23(m)

In [None]:
ustep_23(rng.integers(0, 100, (2, 3, 4)))

## Views and Copies

In [None]:
original = np.arange(10)
original

In [None]:
ref_to_original = original   # will point to `original`
ref_to_original[2] = 99
original             # changing `ref_to_original` has changed `original`

In [None]:
single_value = original[5]      # single element access returns a copy
single_value

In [None]:
single_value = 9999
original             # not affected when `single_value` is changed

### Slices return (memory) views

In [None]:
original = np.arange(10)
original

In [None]:
a_slice = original[2:4]    # slices return (memory) views
a_slice

In [None]:
a_slice[1] = 1000  # changing elements of `original` are actual changes to `a_slice`
original

In [None]:
original[3:6] = [101, 102, 103]   # changing multiple elements at once
original

In [None]:
copy_arr = np.copy(original)

In [None]:
original.copy()

In [None]:
copy_arr[4] = 3243

In [None]:
original