<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/ufidon/ml/blob/main/mod1/np.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
  </td>
  <td>
    <a target="_blank" href="https://kaggle.com/kernels/welcome?src=https://github.com/ufidon/ml/blob/main/mod1/np.ipynb"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" /></a>
  </td>
</table>
<br>

Numpy introduction
---
- the fundamental Python package for scientific computation, provides
  - a multidimensional array object 
  - various derived objects such as masked arrays and matrices 
  - various routines for fast operations on arrays, including 
    - mathematical, logical, shape manipulation, sorting, selecting, I/O, 
    - basic linear algebra, basic statistical operations, random simulation, etc.
- simpler and faster than regular Python through
  - optimized precompiled C code
  - vectorization: applying operations to arrays instead of just individual elements
- more readable with close mathematical notation

In [None]:
import numpy as np, pprint as pp, copy, matplotlib.pyplot as plt, matplotlib as mpl

In [None]:
%%time 
# magic function must be put in the first line by itself
# numpy is much faster than regular Python

# 1. regular Python
a = list(range(100_1000))
b = copy.deepcopy(a)
c = [x*x+3*y for (x,y) in zip(a,b)]

In [None]:
%%time
# 2. numpy
x,y = np.arange(100_1000), np.arange((100_1000))
z = x**2 + 3*y

ndarray: n-dimensional arrays
---
- store elements of same data type
- the fundamental datatype of numpy
- fixed size at creation
- more efficient runtime and memory-wise than Python lists

In [None]:
# 1.1 create ndarray

# 1-d array
d1 = np.array([5.1, 2.3, .7])

# 2-d array
d2 = np.array([['5','f','美','ओ'],['💘','💝','💞','💖']])

# 3-d array
d3 = np.array([[[1,1],[2,2]],[[3,3],[4,4]],[[4,4],[5,5]]])

print(f'{d1=}, \n{d2=}, \n{d3=}')
# the *shape* of an array is a tuple of integers 
# giving the size of the array along each dimension
print(f'{d1.shape=}, {d2.shape=}, {d3.shape=}')

# array size = number of elements
print(f'{d1.size=}, {d2.size=}, {d3.size=}')

# the *rank* of the array is its number of dimensions
# arrays with dimensions no less than 3 are also called *tensors*
print(f'rank(d1)={len(d1.shape)}, rank(d2)={len(d2.shape)}, rank(d3)={len(d3.shape)}')
print(f'rank(d1)={d1.ndim}, rank(d2)={d2.ndim}, rank(d3)={d3.ndim}')

# type: U1 means 1 character unicode string
print(f'{type(d1)=}, {d1.dtype=}, {d1.itemsize=}\n{type(d2)=}, {d2.dtype=}, {d2.itemsize=}\n{type(d3)=}, {d3.dtype=}, {d3.itemsize=}')

In [None]:
# 1.2 ndarray shape manipulation
# return a reshaped copy
f3 = d3.reshape(-1) # flatten ndarray
r4 = f3.reshape((4,-1)) # 4 rows, -1 auto columns
r2a2 = f3.reshape((2,-1,3))
ts = d3.reshape((2,6)) # specify the new shape
print(f'{f3=}\n{r4=}\n{r2a2=}\n{ts=}')
print(f'{f3.shape=}, {r4.shape=}, {r2a2.shape=}, {ts.shape=}')


In [None]:
# reshape in place
ri = np.copy(d3)
ri.shape = (3,4)
print(f'{d3}\n{ri}')

In [None]:
# 1.3 array split and stack
x = np.arange(1, 25).reshape(2, 12)

x1,x2 = np.hsplit(x,2) # horizontal split
print(f'{x}\n\n{x1}\n\n{x2}')

In [None]:
sx = np.vstack([x1,x2]) # vertical stack
print(sx)

In [None]:
y = x.transpose()
y1,y2,y3 = np.vsplit(y,3) # vertical split
print(f'{y}\n\n{y1}\n\n{y2}\n\n{y3}')

In [None]:
sy = np.hstack([y1,y2,y3]) # horizontal stack
print(sy)

In [None]:
# 2. Create special ndarrays
zero = np.zeros((3,4)) # a matrix of zeros
one = np.ones((4,3), dtype=np.int64) # a matrix of ones, specify data type
emty = np.empty((3,3)) # no really empty, just don't care what are inside the memory
unit = np.eye((3)) # unit matrix
cons = np.full((2,3), 2.8) # constant matrix
ran = np.random.random((2,2)) # random matrix
print(f'{zero=}\n{one=}\n{emty=}\n{unit=}\n{cons=}\n{ran=}')

In [None]:
# Create array of random integers between 2 and 10
print(f'{np.random.randint(low=2,high=10,size=(3,3))}') 

In [None]:
# 2.1 Create a range of elements
a1 = np.arange(6);
a2 = np.arange(start=12,stop=0,step=-2) # [start,stop)
a3 = np.linspace(start=12,stop=0,num=6) # [start,stop]
print(f'{a1=}\n{a2=}\n{a3=}')

In [None]:
# 3. Indexing
m = np.array([
  ['00','01','02','03','04','05'],
  ['10','11','12','13','14','15'],
  ['20','21','22','23','24','25'],
  ['30','31','32','33','34','35'],
  ['40','41','42','43','44','45']
])
print(f'{m=}\n{m.shape=}')

# 3.1 select single element
print(f'{m[3][4]=} {m[3,4]=}')

# 3.2 slicing
print(f'{m[1:4, :4]=}\n{m[3:,::-1]=}\n{m[...,2]=}\n{m[2,:]=}')

# 3.3 indexing with array of indexes
rowidx, colidx = np.array([0,0,4,4]), np.array([0,5,0,5])
print(f'{m[rowidx, colidx]=}')
# same as
for row, col in zip(np.array([0,0,4,4]), np.array([0,5,0,5])):
  print(f'{m[row, col]}', end=',')
  
# 3.4 logical indexing
selection = ('11'<=m) & (m<='33')
print(f"{m[selection]=}")

In [None]:
# An example of indexing array of numbers
nums = np.array([
  [1 , 2, 3, 4], 
  [5, 6, 7, 8], 
  [9, 10, 11, 12]
  ])

s1,s2,s3,s4 = nums<7, nums >=7, nums%2==0, (nums>3)&(nums<11)
print(f'{nums=}\n{s1=}\n{s2=}\n{s3=}\n{s4=}')
print(f'{nums[s1]=}\n{nums[s2]=}\n{nums[s3]=}\n{nums[s4]=}')

In [None]:
# zero out non-selected elements
# modify elements in place
backup = nums.copy()
nums[~s4] = 0 # by broadcasting, see below
print(f'{nums=}\n{backup=}')

Mathematical operations on arrays
---
- broadcasting
- element-wise operations
  - addition, subtraction, multiplication, division, etc.
- aggregation of elements such as sum, min, max, etc.
- matrix operations such as transposition, reverse, etc.
- apply mathematical functions


In [None]:
# 1. broadcasting
# operation between arrays of different sizes
A = np.array([
  [1,2],
  [3,4]
])
B = A*3+2
print(f'{A}\n{B}')

In [None]:
print(f'{A+[3,3]}\n{A-[[3,],[3,]]}')

In [None]:
# 2. addition, subtraction, multiplication, division, etc.
C1,C2,C3,C4,C5,C6,C7 = A+B,A-B,A*B,A/B, A//B, A%B, A**B
print(f'{C1}\n{C2}\n{C3}\n{C4}\n{C5}\n{C6}\n{C7}')

In [None]:
# 3. aggregation of elements such as sum, min, max, etc.
print(f'{C1.sum()=}, {C1.sum(axis=0)=}, {C1.sum(axis=1)=}')
print(f'{C1.min()=}, {C1.min(axis=0)=}, {C1.min(axis=1)=}')
print(f'{C1.max()=}, {C1.max(axis=0)=}, {C1.max(axis=1)=}')
print(f'{C1.mean()=}, {C1.prod(axis=0)=}, {C1.std(axis=1)=}')

In [None]:
# 3.2 get unique elements and counts
R = np.random.randint(low=2,high=10,size=(3,4))
print(f'{R=}\n{np.unique(R)=}')


In [None]:
values, indexes, counts = np.unique(R, return_index=True, return_counts=True)
print(f'{values=}\n{indexes=}\n{counts=}')

In [None]:
# 4. transpose and flip
print(f'{R=}\n{R.T=}\n{R.transpose()=}')

In [None]:
# flip or reverse, not in place
# flip both upside down, and left side right
print(f'{R}\n{np.flip(R)}')
print(f'{np.fliplr(R)}\n{np.flipud(R)}')

In [None]:
# flipud = flip on axis 0; fliplr = flip on axis 1
print(f'{np.flip(R, axis=0)}\n{np.flip(R, axis=1)}')

In [None]:
# matrix multiplication
R2 = R.dot(R.T)
print(f'{R2}') # 3x4 * 4x3 = 3x3

In [None]:
# matrix inverse and pseudo-inverse
import numpy.linalg as npl
print(f'{R2.dot(npl.inv(R2))}\n{R.dot(npl.pinv(R))}')

In [None]:
# QR decomposition
q,r = npl.qr(R2)
print(f'{q.dot(r)}')

In [None]:
# svd - singular value decomposition
# R2 = U.S.V
U,D,V = npl.svd(R2) # D, diagonal only
S = np.zeros(R2.shape) 
S[np.diag_indices_from(R2)] = D # S, diagonal matrix
print(f'{U}\n{S}\n{V}\n{U.dot(S).dot(V)}')

In [None]:
# matrix determinant, diagonal, and trace=np.diag(R2).sum()
print(f'{npl.det(R2)}\n{np.diag(R2)}\n{R2.trace()}\n{np.diag(R2).sum()}')

In [None]:
# eigenvalues and eigenvectors
# R2.v =  λ*v
λ, v = npl.eig(R2)
print(f'{λ=}\n{v=}')

In [None]:
print(f"{R2.dot(v) - λ*v}")

__5. apply mathematical functions__

- called universal functions, applied element-wise
  - vectorized version of popular math functions such as
    - abs, sqrt, exp,log, ceil, sin, etc.
    - isnan, isinfo, isneginfo, etc.
    - used as unary function
  - binary functions such as add, greater, etc.

$\displaystyle MeanSquareError=\frac{1}{n}Σ_{i=1}^{n}(\hat{Y}_i-Y_i)^2$

In [None]:
Yh = np.random.random((5,1)) # predictions
Ya = np.random.random((5,1)) # labels

err = (1/5)*np.sum(np.square(Yh-Ya))
print(f'{np.hstack([Yh,Ya])}\n{err=}')


In [None]:
print(f'{np.greater(np.sin(Yh), np.cos(Ya))}')

__6. solve linear equations__

$AX=b$

- $x+3y-4z = 7$
- $3x-5y+2z = 25$
- $4x -y + z = 1$

In [None]:

A = np.array([
  [1,3,-4],
  [3,-5,2],
  [4,-1,1]
])
b = np.array([7,25,1])
sol = npl.solve(A,b)
print(f"{sol=}")


In [None]:
# check the solution
A.dot(sol), b

In [None]:
# check the solution again
np.allclose(A.dot(sol), b)

# References
- [NumPy documentation](https://numpy.org/doc/stable/index.html)