# Numpy

In [31]:
import numpy as np 

## Numpy Array Construction and Check

In [87]:
np.random.seed(42) # seed for reproducibility
x1= np.random.randint(10, size=6) # One-dimensional array
x2= np.random.randint(10, size=(3,4)) # Two-dimensional array
x3= np.random.randint(10, size=(3,4,5)) # Three-dimensional array

In [37]:
print("x3 ndim: ", x3.ndim) # check the number of dimensions
print("x3 shape:", x3.shape) # check size of each dimension
print("x3 size: ", x3.size) # check total size of the array
print("x3 dtype:", x3.dtype) # check data type of the array

x3 ndim:  3
x3 shape: (3, 4, 5)
x3 size:  60
x3 dtype: int64


## Numpy Array Indexing: Accessing Single Elements

### One-Dimension

In [39]:
x1

array([6, 3, 7, 4, 6, 9])

In [40]:
x1[0] # Select first item (index starting from 0)

6

In [41]:
x1[-1] # Select last item

9

In [42]:
x1[-2] # Select second last item

6

In [47]:
x1[-2] = 100 # Modify the array by assign a signle value
x1

array([  6,   3,   7,   4, 100,   9])

### Multiple Dimension

In [43]:
x2

array([[2, 6, 7, 4],
       [3, 7, 7, 2],
       [5, 4, 1, 7]])

In [44]:
x2[0,0]

2

In [45]:
x2[-1,-1]

7

In [46]:
x2[-2,1]

7

## Array Slicing: Accessing Subarrays
x[start:stop:step]

### One-Dimension

In [49]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [50]:
x[:5]  # first five elements

array([0, 1, 2, 3, 4])

In [51]:
x[5:]  # elements after index 5

array([5, 6, 7, 8, 9])

In [52]:
x[4:7]  # middle sub-array

array([4, 5, 6])

In [53]:
x[::2]  # every other element

array([0, 2, 4, 6, 8])

In [54]:
x[1::2]  # every other element, starting at index 1

array([1, 3, 5, 7, 9])

In [55]:
x[::-1]  # all elements, reversed 

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [56]:
x[5::-2]  # reversed every other from index 5

array([5, 3, 1])

### Multiple Dimension

In [63]:
x2

array([[  2,   6,   7,   4],
       [  3, 200,   7,   2],
       [  5,   4,   1,   7]])

In [64]:
x2[:2, :3]  # two rows, three columns

array([[  2,   6,   7],
       [  3, 200,   7]])

In [65]:
x2[:, ::2]  # all rows, every other column

array([[2, 7],
       [3, 7],
       [5, 1]])

In [66]:
x2[::-1, ::-1] # subarray dimensions can even be reversed together

array([[  7,   1,   4,   5],
       [  2,   7, 200,   3],
       [  4,   7,   6,   2]])

In [67]:
x2[:, 0] # first column of x2

array([2, 3, 5])

In [68]:
x2[0, :] # first row of x2

array([2, 6, 7, 4])

## No-copy VS. Copy

### No-Copy
One important–and extremely useful–thing to know about array slices is that they return views rather than copies of the array data. This default behavior is actually quite useful: it means that when we work with large datasets, we can access and process pieces of these datasets without the need to copy the underlying data buffer.

In [88]:
x2

array([[2, 6, 7, 4],
       [3, 7, 7, 2],
       [5, 4, 1, 7]])

In [89]:
x2_sub = x2[:2, :2]
x2_sub

array([[2, 6],
       [3, 7]])

In [90]:
x2_sub[0, 0] = 99
x2_sub

array([[99,  6],
       [ 3,  7]])

In [91]:
x2

array([[99,  6,  7,  4],
       [ 3,  7,  7,  2],
       [ 5,  4,  1,  7]])

### Copy
If we now modify this subarray, the original array is not touched

In [92]:
x2

array([[99,  6,  7,  4],
       [ 3,  7,  7,  2],
       [ 5,  4,  1,  7]])

In [93]:
x2_sub_copy = x2[:2, :2].copy()
x2_sub_copy

array([[99,  6],
       [ 3,  7]])

In [94]:
x2_sub_copy[0, 0] = 42
x2_sub_copy

array([[42,  6],
       [ 3,  7]])

In [95]:
x2

array([[99,  6,  7,  4],
       [ 3,  7,  7,  2],
       [ 5,  4,  1,  7]])

## Reshaping of Arrays

In [97]:
np.arange(1, 10).reshape((3, 3))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [98]:
x = np.array([1, 2, 3])  # 1d Array
x

array([1, 2, 3])

In [99]:
x.reshape((1, 3)) # # row vector via reshape

array([[1, 2, 3]])

In [100]:
x[np.newaxis, :] # row vector via newaxis

array([[1, 2, 3]])

In [101]:
x.reshape((3, 1)) # column vector via reshape

array([[1],
       [2],
       [3]])

In [102]:
x[:, np.newaxis] # column vector via newaxis

array([[1],
       [2],
       [3]])

## Array Concatenation

### One-Dimension

In [103]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

### Multiple Same Dimension

In [108]:
grid = np.array([[1, 2, 3],
                 [4, 5, 6]])
np.concatenate([grid, grid], axis=0) # concatenate along column

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [109]:
np.concatenate([grid, grid], axis=1) # concatenate along row

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

### Multiple Different Dimension

In [110]:
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7],
                 [6, 5, 4]])
np.vstack([x, grid]) # vertically stack the arrays

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [111]:
y = np.array([[99],
              [99]])
np.hstack([grid, y]) # horizontally stack the arrays

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

## Array Splitting

### One-Dimension

In [112]:
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5]) # Pass a list of split points
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


### Multiple Dimension

In [114]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [116]:
upper, lower = np.vsplit(grid, [2]) # Vertical Split 4x4 -> two 2x4
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [117]:
left, right = np.hsplit(grid, [2])  # Honrizontal Split 4x4 -> two 4x2
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


## Array Aggregations

### One-Dimension

In [150]:
L = np.random.random(10)
L

array([0.20891872, 0.67114352, 0.35864678, 0.25416365, 0.29529059,
       0.32255076, 0.84866979, 0.13662133, 0.708911  , 0.55281998])

In [151]:
np.sum(L)

4.357736117647582

In [152]:
np.min(L), np.max(L)

(0.13662133144202881, 0.8486697949246744)

### Multiple Dimension

In [125]:
M = np.random.random(size=(3, 4))
M

array([[0.40450813, 0.8877701 , 0.85092845, 0.93563499],
       [0.78534065, 0.66898825, 0.58068662, 0.37228277],
       [0.94013344, 0.97366384, 0.28392097, 0.30536386]])

In [126]:
M.sum()

7.989222076992936

In [127]:
M.min(axis=0) #  we can find the minimum value within each column by specifying axis=0

array([0.40450813, 0.66898825, 0.28392097, 0.30536386])

In [128]:
M.max(axis=1) # we can find the maximum value within each row by specifying axis=1

array([0.93563499, 0.78534065, 0.97366384])

All aggregate functions:
np.sum, np.prod,np.mean, np.std, np.var, np.min, np.max, np.argmin, np.argmax, np.median, np.percentile, np.any, np.all

## Array Broadcasting
Broadcasting in NumPy follows a strict set of rules to determine the interaction between the two arrays:

Rule 1: If the two arrays differ in their number of dimensions, the shape of the one with fewer dimensions is padded with ones on its leading (left) side.

Rule 2: If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.

Rule 3: If in any dimension the sizes disagree and neither is equal to 1, an error is raised.

## Working with Boolean Arrays

In [135]:
x = np.random.randint(15, size=(3,4))
x

array([[ 0,  2,  1,  8],
       [ 9, 14,  5,  9],
       [ 2,  7, 13,  7]])

In [137]:
np.sum(x < 6) # how many values less than 6?

5

In [138]:
np.sum(x < 6, axis=1) # how many values less than 6 in each row?

array([3, 1, 1])

In [139]:
np.sum( (x < 6) & (x > 1) ) # how many values less than 6 AND bigger than 1?

3

In [141]:
np.sum( (x < 6) | (x > 8) ) # how many values less than 6 OR bigger than 1?

9

In [142]:
x[x > 5] # What is returned is a one-dimensional array filled with all the values that meet this condition

array([ 8,  9, 14,  9,  7, 13,  7])

In [143]:
x[ (x < 6) & (x > 1)]

array([2, 5, 2])

## Sorting Arrays

### One-Dimension

In [144]:
x = np.array([2, 1, 4, 3, 5]) # To return a sorted version of the array without modifying the input
np.sort(x)

array([1, 2, 3, 4, 5])

In [146]:
x.sort() # If you prefer to sort the array in-place, you can instead use the sort method of arrays
x

array([1, 2, 3, 4, 5])

In [147]:
x = np.array([2, 1, 4, 3, 5]) 
i = np.argsort(x) # returns the indices of the sorted elements
i

array([1, 0, 3, 2, 4])

In [149]:
x[i] # These indices can then be used (via fancy indexing) to construct the sorted array if desired

array([1, 2, 3, 4, 5])

In [157]:
y = np.array([7, 2, 3, 1, 6, 5, 4])
np.partition(y, 3) # the result is a new array with the smallest 3 values to the left of the partition, 
                   # and the remaining values to the right, in arbitrary order

array([2, 1, 3, 4, 6, 5, 7])

### Multiple Dimension

In [153]:
x = np.random.randint(15, size=(4,6))
x

array([[ 9,  6,  9,  2,  1,  8],
       [ 7, 12, 11,  9,  6,  8],
       [ 3, 12,  3,  0,  7, 13],
       [13, 11,  2,  6, 14, 11]])

In [155]:
np.sort(x, axis=0) # sort each column of x

array([[ 3,  6,  2,  0,  1,  8],
       [ 7, 11,  3,  2,  6,  8],
       [ 9, 12,  9,  6,  7, 11],
       [13, 12, 11,  9, 14, 13]])

In [156]:
np.sort(x, axis=1) # sort each row of x

array([[ 1,  2,  6,  8,  9,  9],
       [ 6,  7,  8,  9, 11, 12],
       [ 0,  3,  3,  7, 12, 13],
       [ 2,  6, 11, 11, 13, 14]])

In [159]:
np.partition(x, 2, axis=1) # The result is an array where the first two slots in each row contain the smallest values from that row, 
                           # with the remaining values filling the remaining slots.

array([[ 1,  2,  6,  9,  9,  8],
       [ 6,  7,  8,  9, 12, 11],
       [ 0,  3,  3, 12,  7, 13],
       [ 2,  6, 11, 13, 14, 11]])