# Section 5: Python for Data Analysis - NumPy

# 02 - Numpy Indexing and Selection 

In [2]:
import numpy as np 

In [3]:
### In this lecture we are going to discuss how to extract elements or groups of elements from a numpy array 

arr = np.arange(0,11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

## Bracket Indexing and Selection

In [4]:
## To extract an element from an np array we use the same syntax as we would in a Python list 
## (Brackets and slice notation)

##Extract index 8
arr[8]

8

In [5]:
##Extracting several values with slice notation 
arr[1:5]

array([1, 2, 3, 4])

In [6]:
arr[0:5]

array([0, 1, 2, 3, 4])

In [7]:
##We can omit any of the start and end indices of the slice and they will default to the start and end indices 
arr[:6]

array([0, 1, 2, 3, 4, 5])

In [8]:
arr[5:]

array([ 5,  6,  7,  8,  9, 10])

In [38]:
arr[:] ##Returns the whole array 

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [41]:
arr[9]

10

## Broadcasting

In [10]:
## Broadcasting - One of the main differences between python lists and np arrays - Resizes our assignment accordingly
arr[0:5]

array([0, 1, 2, 3, 4])

In [11]:
arr[0:5] = 100 ## Broadcasts the value 100 to every element from 0 to 5 indices in the array

In [12]:
arr

array([100, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])

In [13]:
arr = np.arange(0,11)

In [14]:
arr


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [15]:
###Be careful -> Python uses references to point to np arrays in memory, thus assigning the variable arr to 
##               another variable say new_arr will reference new_arr to the object in memory and WILL NOT 
##               create a copy of arr. -> This is done for memory reasons when using large np arrays 

new_arr = arr 

###Then, 
new_arr[0:5] = 200
new_arr

array([200, 200, 200, 200, 200,   5,   6,   7,   8,   9,  10])

In [16]:
##Calling arr we observe the following!

arr

array([200, 200, 200, 200, 200,   5,   6,   7,   8,   9,  10])

In [17]:
## We can avoid this by calling the .copy() method 

arr = np.arange(0,11)
arr_copy = arr.copy()

In [18]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [19]:
arr_copy[:] = 7

In [20]:
##We observe that the broadcasting was only performed in the array copy 

arr_copy

array([7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7])

In [21]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

## Indexing matrices

In [22]:
arr_2d = np.array([[5,10,15], [20, 25,30], [35,40,45]])

In [23]:
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [24]:
### There are two formats for grabbing elements from a 2d array or matrix 

##Double bracket format (Not recommended)

arr_2d[2][2] ##arr[row][column]

45

In [25]:
##Single bracket format 
arr_2d[1,2]    ###arr[row,column]

30

In [26]:
###Using slice notations to grab sub-matrices - Not common in this course
arr_2d[:2, 1:] ##Recall that the end index is NOT included 

array([[10, 15],
       [25, 30]])

## Conditional selection

In [27]:
arr = np.arange(1,11)

In [28]:
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [29]:
###We can combine the array above with conditional operations in order to obtain boolean arrays, e.g. 
arr > 5

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

In [30]:
##We can save this boolean arrays as usual 
bool_arr = arr>5

In [31]:
bool_arr

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

In [32]:
##Now we can perform selections with this boolean array: 

arr[bool_arr] ##Conditional indexing 

array([ 6,  7,  8,  9, 10])

In [33]:
arr[arr>5]  ##We can do the same without assigning the boolean array 

array([ 6,  7,  8,  9, 10])

In [34]:
arr[arr<3] ##This is very common, specially when we use the pandas library. 

array([1, 2])

In [35]:
arr_2d = np.arange(50).reshape(5,10)

In [36]:
arr_2d

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]])

In [37]:
##Suggested exercise:
arr_2d[1:3,3:5 ]

array([[13, 14],
       [23, 24]])