In [1]:
import numpy as np

### Create array from list and define the dtype

In [8]:
a = np.array([1, 2, 3, 4])
a

array([1, 2, 3, 4])

In [5]:
a.dtype

dtype('int64')

In [9]:
a = np.array([1, 2, 3, 4], dtype='float32')
a

array([1., 2., 3., 4.], dtype=float32)

### Other ways of creating arrays

In [11]:
np.zeros((3, 5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [185]:
# Can use *_like to create array with same shapes
a = np.random.random((3, 3))
np.zeros_like(a)

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [12]:
np.ones((3, 5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [13]:
np.full((3, 5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [14]:
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [15]:
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [16]:
np.random.random((3, 3))

array([[0.19697747, 0.17476743, 0.77409331],
       [0.9758379 , 0.12191345, 0.69023269],
       [0.09456132, 0.63996858, 0.83619912]])

In [18]:
np.random.normal(0, 1, (3, 3))

array([[-0.4412539 , -0.44701792,  0.15158936],
       [-0.3827202 , -0.23924838,  0.55177496],
       [ 0.34524298, -0.91023145,  0.89092761]])

In [20]:
np.random.randint(0, 10, (3, 3))

array([[9, 3, 0],
       [9, 9, 6],
       [5, 8, 9]])

In [21]:
# identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [22]:
# The value will be whatever happens to already exist at that memory location
np.empty(3)

array([1., 1., 1.])

### Array Attributes

In [24]:
x = np.random.randint(10, size=(3, 4, 5))

In [26]:
x.ndim

3

In [27]:
x.shape

(3, 4, 5)

In [28]:
x.size

60

In [29]:
x.dtype

dtype('int64')

In [30]:
# bytes for each item in the array
x.itemsize

8

In [31]:
# Total size in bytes of the array
x.nbytes

480

### Reshape, concatenate and split

In [58]:
# Reshape when know both dimensions
np.arange(1, 10).reshape(3, 3)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [65]:
# Reshape when only know one
np.arange(12).reshape(3, -1)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [84]:
# Make it 2d
np.arange(3)[:, np.newaxis]

array([[0],
       [1],
       [2]])

In [76]:
# Concatenate along axis 1
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [83]:
# Concatenate along axis 0
np.concatenate([x[np.newaxis, :], y[np.newaxis, :]], axis=0)

array([[1, 2, 3],
       [3, 2, 1]])

In [87]:
# Another way of concatenation along axis 1
np.hstack([x, y])

array([1, 2, 3, 3, 2, 1])

In [88]:
# Another way of concatenation along axis 2
np.vstack([x, y])

array([[1, 2, 3],
       [3, 2, 1]])

In [89]:
# Split using index
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5])
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


In [96]:
# Split vertically
grid = np.arange(16).reshape(4, 4)
upper, lower = np.vsplit(grid, 2)
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [97]:
# Split horizontally
left, right = np.hsplit(grid, 2)
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


## and/or vs &/|

& and | are bitwise operators, which looks into the element within each object. \
On the other hand while "and" and "or" look at the whole objects. \
So, if we are using boolean operations, we almost always prefer the bitwise operators.

In [171]:
A = np.array([1, 0, 1, 0, 1, 0], dtype=bool)
B = np.array([1, 1, 1, 0, 1, 1], dtype=bool)

In [172]:
A|B

array([ True,  True,  True, False,  True,  True])

In [173]:
A or B

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

## Advanced Ufunc Features

### Specifying ouput

In [104]:
# Directly return the output into another memory location
x = np.arange(5)
y = np.empty(5)
np.multiply(x, 10, out=y)
y

array([ 0., 10., 20., 30., 40.])

In [105]:
# The output format can be customized
y = np.zeros(10)
np.power(2, x, out=y[::2])
y

array([ 1.,  0.,  2.,  0.,  4.,  0.,  8.,  0., 16.,  0.])

### Aggreates

In [107]:
x = np.arange(1, 6)
# Add with reduce
np.add.reduce(x)

15

In [108]:
# Multiply with reduce
np.multiply.reduce(x)

120

In [109]:
# Add with accumulation, equivalent to np.cumsum
np.add.accumulate(x)

array([ 1,  3,  6, 10, 15])

In [110]:
# Multiply with accumulate
np.multiply.accumulate(x)

array([  1,   2,   6,  24, 120])

## Broadcasting

The broadcasting in Numpy follows a strict set of rules to determine the interaction between two arrays: 
- **Rule 1**: If the two arrays differ in their number of dimensions, the shape of one with fewer dimension is padded with one on its leading (left) side.
- **Rule 2**: If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.
- **Rule 3**: If in any dimension the size disagree and neither is equal to 1, an error is raised.

### Example 1: 2d array + 1d array

In [123]:
M = np.ones([2, 3])
print(M)
print(M.shape)

[[1. 1. 1.]
 [1. 1. 1.]]
(2, 3)


In [124]:
a = np.arange(3)
print(a)
print(a.shape)

[0 1 2]
(3,)


In [120]:
M + a

array([[1., 2., 3.],
       [1., 2., 3.]])

### Example 2: both arrays need to be broadcast

In [126]:
a = np.arange(3).reshape(3, 1)
b = np.arange(3)

In [127]:
print(a.shape)
print(b.shape)

(3, 1)
(3,)


In [128]:
a + b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

### Example 3: the two arrays are not compatible

In [129]:
M = np.ones((3, 2))
a = np.arange(3)
M + a

ValueError: operands could not be broadcast together with shapes (3,2) (3,) 

## Fancy Indexing

In [177]:
# index 1d array using list
rand = np.random.RandomState(42)
x = rand.randint(100, size=10)
ind = [3, 7, 4]
x[ind]

array([71, 86, 60])

In [179]:
# index 2d array using list/array
X = np.arange(12).reshape((3, 4))
row = np.array([0, 1, 2])
col = np.array([2, 1, 3])
X[row, col]

array([ 2,  5, 11])

In [180]:
# With broadcasting
X[row[:, np.newaxis], col]

array([[ 2,  1,  3],
       [ 6,  5,  7],
       [10,  9, 11]])

In [181]:
# The operation will not repeat if we repeat the index
x = np.zeros(10)
i = [2, 3, 3, 4, 4, 4]
x[i] += 1
x

array([0., 0., 1., 1., 1., 0., 0., 0., 0., 0.])

In [182]:
# Instead, use np.add.at
x = np.zeros(10)
i = [2, 3, 3, 4, 4, 4]
np.add.at(x, i, 1)
x

array([0., 0., 1., 2., 3., 0., 0., 0., 0., 0.])

In [186]:
np.searchsorted?

In [187]:
np.partition?