Numpy introduction
---
- the fundamental Python package for scientific computation, provides
  - a multidimensional array object 
  - various derived objects such as masked arrays and matrices 
  - various routines for fast operations on arrays, including 
    - mathematical, logical, shape manipulation, sorting, selecting, I/O, 
    - basic linear algebra, basic statistical operations, random simulation, etc.
- simpler and faster than regular Python through
  - optimized precompiled C code
  - vectorization: applying operations to arrays instead of just individual elements
- more readable with close mathematical notation

In [None]:
import numpy as np, pprint as pp, copy, matplotlib.pyplot as plt, matplotlib as mpl

In [None]:
%%time 
# magic function must be put in the first line by itself
# numpy is much faster than regular Python

# 1. regular Python
a = list(range(100_1000))
b = copy.deepcopy(a)
c = [x*x+3*y for (x,y) in zip(a,b)]

In [None]:
%%time
# 2. numpy
x,y = np.arange(100_1000), np.arange((100_1000))
z = x**2 + 3*y

ndarray: n-dimensional arrays
---
- store elements of same data type
- the fundamental datatype of numpy
- fixed size at creation
- more efficient runtime and memory-wise than Python lists

In [None]:
# 1.1 create ndarray

# 1-d array
d1 = np.array([5.1, 2.3, .7])

# 2-d array
d2 = np.array([['5','f','美','ओ'],['💘','💝','💞','💖']])

# 3-d array
d3 = np.array([[[1,1],[2,2]],[[3,3],[4,4]],[[4,4],[5,5]]])

print(f'{d1=}, \n{d2=}, \n{d3=}')
# the *shape* of an array is a tuple of integers 
# giving the size of the array along each dimension
print(f'{d1.shape=}, {d2.shape=}, {d3.shape=}')

# the *rank* of the array is its number of dimensions
print(f'rank(d1)={len(d1.shape)}, rank(d2)={len(d2.shape)}, rank(d3)={len(d3.shape)}')

# type: U1 means 1 character unicode string
print(f'{type(d1)=}, {d1.dtype=}\n{type(d2)=}, {d2.dtype=}\n{type(d3)=}, {d3.dtype=}')

In [None]:
# 1.2 ndarray shape manipulation

f3 = d3.reshape(-1) # flatten ndarray
r4 = f3.reshape((4,-1)) # 4 rows, -1 auto columns
r2a2 = f3.reshape((2,-1,3))
print(f'{f3=}\n{r4=}\n{r2a2=}')
print(f'{f3.shape=}, {r4.shape=}, {r2a2.shape=}')


In [None]:
# 2. Create special ndarrays
zero = np.zeros((3,4)) # a matrix of zeros
one = np.ones((4,3)) # a matrix of zeros
unit = np.eye((3)) # unit matrix
cons = np.full((2,3), 2.8) # constant matrix
ran = np.random.random((2,2)) # random matrix
print(f'{zero=}\n{one=}\n{unit=}\n{cons=}\n{ran=}')

In [None]:
# 3. Indexing
m = np.array([
  ['00','01','02','03','04','05'],
  ['10','11','12','13','14','15'],
  ['20','21','22','23','24','25'],
  ['30','31','32','33','34','35'],
  ['40','41','42','43','44','45']
])
print(f'{m=}\n{m.shape=}')

# 3.1 select single element
print(f'{m[3][4]=} {m[3,4]=}')

# 3.2 slicing
print(f'{m[1:4, :4]=}\n{m[3:,::-1]=}\n{m[...,2]=}\n{m[2,:]=}')

# 3.3 indexing with array of indexes
rowidx, colidx = np.array([0,0,4,4]), np.array([0,5,0,5])
print(f'{m[rowidx, colidx]=}')
# same as
for row, col in zip(np.array([0,0,4,4]), np.array([0,5,0,5])):
  print(f'{m[row, col]}', end=',')
  
# 3.4 logical indexing
selection = ('11'<=m) & (m<='33')
print(f"{m[selection]=}")


# References
- [NumPy documentation](https://numpy.org/doc/stable/index.html)