# NumPy

NumPy is a library that primarily provides an array package for homogenous data. Both memory efficient and CPU efficient (vs Python).

In [None]:
%matplotlib inline
from IPython.display import display
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets

In [None]:
# 1D 
s = np.array(range(10))

In [None]:
s.mean()

In [None]:
# inspiration for pandas
s.shape

In [None]:
s.dtype

In [None]:
len(s)

In [None]:
# 2D
a = np.arange(100).reshape(20, 5)
a

In [None]:
a.shape # (n, m) - n rows, m columns

In [None]:
len(a)  # size of first dimension

In [None]:
a.ndim

In [None]:
a.mean(axis=0)

In [None]:
a.mean(axis=0, keepdims=True)

In [None]:
a.mean(axis=1)

In [None]:
a.mean(axis=1, keepdims=True)

In [None]:
a.mean(axis=None)  # mean of flattened array

In [None]:
# 3D
b = np.arange(70).reshape(7, 5, 2)
b

In [None]:
b.mean(axis=1)

In [None]:
b.mean(axis=0)

In [None]:
b.mean(axis=2)

## Creation

In [None]:
np.ones((3, 5))

In [None]:
np.ones((3, 5), dtype=int)

In [None]:
np.zeros((3, 5))

In [None]:
np.eye(3, 5)

In [None]:
np.diag(range(3))

In [None]:
np.linspace(0, 10, num=15)

In [None]:
np.linspace(0, 10, num=15).reshape(3,5)

In [None]:
np.random.random((3,5))

In [None]:
np.linspace(0, 10)

## Math Operations

In [None]:
print(sorted(dir(a)))

In [None]:
data = datasets.load_iris()
iris = data.data

In [None]:
iris

In [None]:
iris.min(axis=0)

In [None]:
iris.max(axis=0)

In [None]:
# standardize
# note though cool w/ unicode vars, annoying in practice
μ = iris.mean(axis=0)
μ

In [None]:
σ = iris.std(axis=0)
σ

In [None]:
iris_standardized = (iris - μ)/σ

In [None]:
plt.scatter(iris_standardized[:,0], iris_standardized[:,1])

In [None]:
plt.scatter(iris[:,0], iris[:,1])

In [None]:
c = np.array([0,1,2,9,8,7]).reshape(2,3)
d = np.array([6,5,4,3,4,5]).reshape(2,3)
display(c)
display(d)

In [None]:
# Addition
c + d

In [None]:
# Subtraction
c - d

In [None]:
# Mult
c * d

In [None]:
# Broadcast
4 * c

In [None]:
# Transpose (creates a view)
c.T

In [None]:
# Dot product

c @ d.T

In [None]:
# Universal functions
np.sin(c)

In [None]:
np.log(c)

In [None]:
np.log(c+1)

In [None]:
# Boolean arrays 
c < 2

In [None]:
# "Fancy Indexing" w/ Boolean arrays 
# returns a copy
c[c < 2]

In [None]:
# Boolean arrays
c2 = c.copy()
c2[c < 2] = 0
c2

In [None]:
c

## Indexing/Slicing

In [None]:
iris

In [None]:
iris[0]  # first row

In [None]:
iris[-1]  # last row

In [None]:
iris[:, 0]  # first col

In [None]:
iris[:, -1]  # last col

In [None]:
iris[:3, :2]  # first 3 rows, 2 cols

In [None]:
iris[0, 1]  # Value of first row, second col

## Copy or Not?

* changing ``shape`` is not a copy
* a *slice* creates a view, if you manipulate it, you change the original
* calling ``.copy`` creates a new copy