# Numpy
Basics vectorized calculations

In [1]:
import numpy as np

## Generating Numbers

In [2]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [3]:
# Random 2x3 array
np.random.randn(2, 3)

array([[-0.40883237,  0.86324608,  0.55489733],
       [-0.5688655 , -1.16537721,  0.79237249]])

In [4]:
# Create from python array
np.array([1,2,3])

array([1, 2, 3])

In [5]:
# Create zeros
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [6]:
# Create ones
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [7]:
# Create identity matrix
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [8]:
# Reshape
np.arange(15).reshape(3,5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [9]:
# Transpose
np.arange(15).reshape(3,5).T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

## Query Data Types

In [10]:
zeros23 = np.zeros((2,3))

In [11]:
zeros23.ndim

2

In [12]:
zeros23.shape

(2, 3)

In [13]:
zeros23.dtype

dtype('float64')

## Index and Slice
Just like python list, but not a copy

In [14]:
arr = np.arange(10)

In [15]:
arr[3], arr[:3], arr[-1]

(3, array([0, 1, 2]), 9)

In [16]:
sub_arr = arr[4:6]
sub_arr

array([4, 5])

In [17]:
# Changes to the slice applies to the array
sub_arr[:] = 10
arr

array([ 0,  1,  2,  3, 10, 10,  6,  7,  8,  9])

In [18]:
# Copy
np.arange(2).copy()

array([0, 1])

Higher dimension array address

In [19]:
arr_2d = np.array([[0,1],[2,3]])
arr_2d

array([[0, 1],
       [2, 3]])

In [20]:
arr_2d[0][1], arr_2d[0,1]

(1, 1)

Slicing multidimensional Array

In [21]:
arr33 = np.array([ 
    [1,2,3], 
    [4,5,6], 
    [7,8,9] 
])
arr33

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [22]:
# slice row 
arr33[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [23]:
# slice row and col
arr33[:2, 1:]

array([[2, 3],
       [5, 6]])

In [24]:
# slice only col

In [25]:
arr33[:,1]

array([2, 5, 8])

## Boolean Select

In [26]:
r = np.arange(10)
r

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [27]:
# Math operation on numpy array are pairwise
r % 2 == 0

array([ True, False,  True, False,  True, False,  True, False,  True,
       False])

In [28]:
# Use numpy boolean array to select entry
r[r%2 == 0]

array([0, 2, 4, 6, 8])

### Boolean Logic in Selection

In [29]:
# Not
r[~(r%2 == 0)]

array([1, 3, 5, 7, 9])

In [30]:
# And
r[(r%3 == 0) & (r%2 == 0)]

array([0, 6])

In [31]:
# Or
r[(r%3 == 0) | (r%2 == 0)]

array([0, 2, 3, 4, 6, 8, 9])

### Array Selection
This always copy to new array

In [32]:
# Whole array
aSelect = np.arange(36).reshape(6,6)
aSelect

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [33]:
# Select specific row
aSelect[ [1,3] ]

array([[ 6,  7,  8,  9, 10, 11],
       [18, 19, 20, 21, 22, 23]])

In [34]:
# Select elements [ [x1,x2],[y1,y2] ]
aSelect[ [1,3], [1,3] ]

array([ 7, 21])

## Compute


In [35]:
c = np.arange(10)
c

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [36]:
np.sqrt(c)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [37]:
np.exp(c)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [38]:
np.power(c,2), c ** 2

(array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81]),
 array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81]))

In [39]:
np.power(c,c), c ** c

(array([        1,         1,         4,        27,       256,      3125,
            46656,    823543,  16777216, 387420489]),
 array([        1,         1,         4,        27,       256,      3125,
            46656,    823543,  16777216, 387420489]))

In [40]:
np.ceil(c), np.floor(c), np.rint(c), np.sign(c)

(array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]),
 array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]),
 array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]),
 array([0, 1, 1, 1, 1, 1, 1, 1, 1, 1]))

In [41]:
# Get both integer and fraction
dm = np.random.randn(10)*3
dm

array([ 1.35311301, -1.8343628 , -6.70381209, -3.11682823,  4.76831201,
        1.10080266,  0.18858412,  2.71890846,  3.79869898, -2.10553403])

In [42]:
# Return tuple of two
np.modf(dm)

(array([ 0.35311301, -0.8343628 , -0.70381209, -0.11682823,  0.76831201,
         0.10080266,  0.18858412,  0.71890846,  0.79869898, -0.10553403]),
 array([ 1., -1., -6., -3.,  4.,  1.,  0.,  2.,  3., -2.]))

In [58]:
# IF, THEN, ELSE
np.where(dm > 0, 1, 0)

array([1, 0, 0, 0, 1, 1, 1, 1, 1, 0])

In [76]:
# Sort (in place or new)
s = np.random.randn(5)
s.sort()
s, np.sort(np.random.randn(5))

(array([-0.95941201, -0.34133546, -0.12947334,  0.58700386,  1.02044496]),
 array([-0.73340655, -0.55313901, -0.00632736,  0.17406187,  1.6953248 ]))

In [72]:
np.unique(np.array([7,1,1,4,6,8,6,7]))

array([1, 4, 6, 7, 8])

## Statistics


In [46]:
st = np.random.randn(3,4)
st

array([[ 2.53630389, -0.48411332, -0.09174283, -1.66800425],
       [-0.51904113,  0.31170445, -2.66705987,  0.85074379],
       [ 0.44650065, -1.78888066,  0.41465137, -1.12324   ]])

In [56]:
# Descriptive -- All axis
(st.mean(), st.sum(), st.std(), st.min(), st.max())

(-0.3151814914271633,
 -3.782177897125959,
 1.3323657831430098,
 -2.66705986613135,
 2.5363038929236796)

In [52]:
# Descriptive -- selected axis 
#   0 == over rows (group by col)
#   1 == over cols (group by row)
(st.mean(axis=0), st.mean(axis=1))

(array([ 0.82125447, -0.65376317, -0.78138377, -0.64683349]),
 array([ 0.07311087, -0.50591319, -0.51274216]))

In [53]:
# Cumsum
np.arange(10), np.arange(10).cumsum() 

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([ 0,  1,  3,  6, 10, 15, 21, 28, 36, 45]))

In [55]:
# Cumsum 2D
np.arange(9).reshape(3,3).cumsum(axis=0) 

array([[ 0,  1,  2],
       [ 3,  5,  7],
       [ 9, 12, 15]])