# Numpy Introduction

In [1]:
import numpy as np

### Creating arrays

In [2]:
L = [1,2,3,4]
np.array(L)

array([1, 2, 3, 4])

In [3]:
np.arange(1,10)

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [4]:
# arange(start, stop) returns a numpy array
np.arange(4,10)

array([4, 5, 6, 7, 8, 9])

In [5]:
# arange(start, stop, skip) returns a numpy array
np.arange(4,10,2)

array([4, 6, 8])

In [6]:
# multidimensional array
array_of_arrays = [[1,2,3], [4,5,6], [7,8,9]]
np.array(array_of_arrays)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [9]:
# reshape(n_rows, n_columns) changes the shape of a numpy array

np.arange(1,10).reshape(3,3)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

### Useful methods

In [10]:
arr = np.array([17, 9, 3, -5, -1])

In [11]:
arr.max()

17

In [12]:
arr.min()

-5

In [13]:
arr.mean()

4.6

In [14]:
arr.std()

7.735631842325487

In [15]:
arr.sort()

In [16]:
arr

array([-5, -1,  3,  9, 17])

In [17]:
X = np.arange(1,26).reshape(5,5)
X

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25]])

In [18]:
X.mean(axis=1)

array([ 3.,  8., 13., 18., 23.])

In [19]:
X.mean(axis=0)

array([11., 12., 13., 14., 15.])

In [None]:
X.std(axis=0)

In [20]:
mean = X.mean(axis=0)
mean

array([11., 12., 13., 14., 15.])

In [21]:
X - mean

array([[-10., -10., -10., -10., -10.],
       [ -5.,  -5.,  -5.,  -5.,  -5.],
       [  0.,   0.,   0.,   0.,   0.],
       [  5.,   5.,   5.,   5.,   5.],
       [ 10.,  10.,  10.,  10.,  10.]])

### Random numbers

In [24]:
# random number beteen 0 and 1
np.random.rand(3,3)

array([[0.80950432, 0.82179254, 0.67596438],
       [0.53041099, 0.32784551, 0.07110744],
       [0.24634188, 0.0020426 , 0.19838933]])

In [27]:
# normal distribution, between approximately -3 and 3
np.random.randn(4,5)

array([[ 0.31099535, -0.66810001,  0.36652868, -0.25394261, -0.08141159],
       [-2.18906466,  0.522568  ,  0.80358265,  2.32067552, -1.33267585],
       [ 1.67937768,  1.6662256 , -1.09146987,  0.7297528 ,  0.88564406],
       [ 0.46690242, -1.21906852, -0.44770158, -0.27694125,  1.06939096]])

In [28]:
# randint(start, exclusive stop, (shape)) returns random integers between start and stop, with a given shape
np.random.randint(0, 10, (5,3))

array([[0, 9, 3],
       [4, 1, 6],
       [0, 1, 8],
       [0, 6, 2],
       [6, 0, 9]])

In [None]:
np.random.randint(1,101, 15)

### Array indexing

In [29]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [30]:
arr[8]

8

In [31]:
arr[3:8]

array([3, 4, 5, 6, 7])

In [32]:
arr[:-1]

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [33]:
arr[-1]

9

In [34]:
arr[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

### Array broadcasting

In [35]:
arr[:5] = 25
arr

array([25, 25, 25, 25, 25,  5,  6,  7,  8,  9])

In [45]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [46]:
# like using a "pointer" to arr
arr2 = arr[:5]
arr2[:] = 99
arr2

array([99, 99, 99, 99, 99])

In [47]:
arr

array([99, 99, 99, 99, 99,  5,  6,  7,  8,  9])

In [None]:
# using a copy of arr
arr3 = arr.copy()
arr3[:] = 0
arr3

In [None]:
arr

### 2d Array (Matrix) indexing

In [48]:
arr_2d = np.arange(12).reshape(4,3)
arr_2d

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [49]:
arr_2d[2][2]

8

In [50]:
# arr[row_index, col_index]

arr_2d[2,2]

8

In [51]:
arr_2d[0]          # get a row (a sample)

array([0, 1, 2])

In [52]:
arr_2d[:4, 0]    # get a column (a single feature)

array([0, 3, 6, 9])

In [53]:
arr_2d[1:3, 1:]   # get a slice

array([[4, 5],
       [7, 8]])

### Appending arrays

In [54]:
a = np.arange(9).reshape(3,3)
a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [55]:
b = np.random.randint(9,18, (3,3))
b

array([[11, 17, 12],
       [13,  9, 11],
       [12, 16, 12]])

In [56]:
c = np.vstack((a,b))
c

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [11, 17, 12],
       [13,  9, 11],
       [12, 16, 12]])

In [57]:
np.concatenate((a,b),axis=0)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [11, 17, 12],
       [13,  9, 11],
       [12, 16, 12]])

In [58]:
np.hstack((a,b))

array([[ 0,  1,  2, 11, 17, 12],
       [ 3,  4,  5, 13,  9, 11],
       [ 6,  7,  8, 12, 16, 12]])

In [None]:
np.concatenate((a,b),axis=1)

### Boolean (conditional) selection

In [59]:
arr = np.arange(1,11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [60]:
arr > 5       # boolean array

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

In [61]:
arr[arr>5]      # you will use this A LOT in pandas!

array([ 6,  7,  8,  9, 10])

### Element-wise operations

In [63]:
# python List
L = [1,2,3,4,5]

# numpy array
arr = np.array([1,2,3,4,5])

In [64]:
L*2

[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]

In [65]:
arr*2

array([ 2,  4,  6,  8, 10])

In [66]:
arr**2

array([ 1,  4,  9, 16, 25], dtype=int32)

In [67]:
arr + 100

array([101, 102, 103, 104, 105])

In [68]:
arr2 = np.arange(6,11)
arr2

array([ 6,  7,  8,  9, 10])

In [69]:
arr + arr2

array([ 7,  9, 11, 13, 15])

In [71]:
arr*arr2

array([ 2,  4,  6,  8, 10])

### Universal functions

In [72]:
arr_ordered = np.arange(10,-1, -1)
arr_ordered

array([10,  9,  8,  7,  6,  5,  4,  3,  2,  1,  0])

In [73]:
np.percentile(arr_ordered, [50, 75])   # interquartile range


array([5. , 7.5])

In [74]:
np.median(arr_ordered)

5.0

In [None]:
np.sqrt(arr_ordered)

In [75]:
np.min(arr_ordered)

0

In [None]:
# index position of the smallest value
np.argmin(arr_ordered)

In [None]:
np.max(arr_ordered)

In [None]:
# index position of the largest value
np.argmax(arr_ordered)

In [None]:
# index position of the smallest to the largest values
np.argsort(arr_ordered)