In [2]:
import numpy as np 

In [12]:
## Test of numpy speed vs. regular list 
np_arr = np.arange(1000000)
reg_list = list(range(0,1000000))

## %time diff. for a simple operation
%time for _ in range(10): np_arr2 = np_arr * 2 # 38.9 ms
%time for _ in range(10): reg_list2 = reg_list * 2 # 273 ms

CPU times: user 21.5 ms, sys: 14.9 ms, total: 36.4 ms
Wall time: 38.9 ms
CPU times: user 230 ms, sys: 40.2 ms, total: 270 ms
Wall time: 273 ms


## 4.1 ndarrary: Numpy's multidimensional array object

In [49]:
## Init an ndarray with 3 rows, 2 columns 
test_ndarray = np.random.randn(2,3)

## Some attributes of an ndarray object 
test_ndarray.shape # Returns tuple - (2,3)
test_ndarray.dtype # float64

## The many ways to create an ndarray object

## 1) Using lists
arr1 = np.array([1,2,3])

## 2) Nested sequences, i.e. list of lists (of equal length)
arr2 = np.array([[1,2,3],[4,5,6]])

print("arr1:", arr1)
print("arr2:", arr2)

## 3) Filling the array with 1s, 0s, empty (no vals)
zeros = np.zeros(2)
ones = np.ones([2,3]) 
empty_array = np.empty([1,2])

## What if want to create array of same size / dtype as an array that already exists, but all 1 -> ones_like()
ones_like = np.ones_like(zeros) # Should be vector of length 2 s

print("\n *Zeros ",zeros, "\n *Ones ", ones, "\n *Empty ", empty_array,
     "\n *Ones like ", ones_like) 

## 4) Higher dimensional ndarrays
higher_dim_array = np.zeros((2,2,2)) # @note: Using a tuple
higher_dim_array2 = np.zeros([2,2,2]) #    This also works though 
print("\n *Higher Dim", higher_dim_array)

np.alltrue(np.equal(higher_dim_array, higher_dim_array2)) ## Probably a much better way of doing this  s


## Quick example of casting: 
print("\nCasting Example:")
float_array = np.array([[1,2],[3,4]], dtype= int)
print(float_array.dtype)
print(float_array.astype(np.float64)) # Now float



arr1: [1 2 3]
arr2: [[1 2 3]
 [4 5 6]]

 *Zeros  [ 0.  0.] 
 *Ones  [[ 1.  1.  1.]
 [ 1.  1.  1.]] 
 *Empty  [[ 0.  0.]] 
 *Ones like  [ 1.  1.]

 *Higher Dim [[[ 0.  0.]
  [ 0.  0.]]

 [[ 0.  0.]
  [ 0.  0.]]]

Casting Example:
int64
[[ 1.  2.]
 [ 3.  4.]]


### Basic Slicing and Indexing 

In [92]:
## @note: Array slices are VIEWS of the original array 
##        any modification to a slice will be reflected in the original array, e.g.
arr = np.arange(1,20)
arr_slice = arr[10:15]
arr_slice[2] = -259
print('arr_slice: ', arr_slice)
print('arr: ', arr)

## Do the same with list - will find that object slice has been copied 
lis = list(np.arange(1,20))
lis_slice = lis[10:15]
lis_slice[2] = -259
print('lis_slice: ', lis_slice) # Slice has been copied - not referencing same object
print('lis: ', lis)

## Why? To avoid copying large ndarray objects

## What about want to change all values in slice
## --> use "bare" slice
arr_slice[:] = -259
print(arr_slice)

## To explicitely copy a slice - use copy()
arr_slice2 = arr[10:15].copy()
arr_slice2[:] = -999999
print("arr (should not be modified):" ,arr)

## Accessing multidimensional arrays 
print("\n\nMultidim arrays")
arr2d = np.array([[1,2,3],[4,5,6],[12,13,14]])
## The below two are equivalent
print(arr2d[0][1])
print(arr2d[0,1])

## What if want 2nd row (all cols) -> omit later index 
print(arr2d[0])

## Slicing 2d arrays
## e.g. select the first 2 rows of arr2d 
print(arr2d)
print(arr2d[:2])
## select first 2 col of arr2d - @note colon ':' takes entire corresponding axis
print(arr2d[:,:2]) # Have to specify 'earlier' indices

arr_slice:  [  11   12 -259   14   15]
arr:  [   1    2    3    4    5    6    7    8    9   10   11   12 -259   14   15
   16   17   18   19]
lis_slice:  [11, 12, -259, 14, 15]
lis:  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
[-259 -259 -259 -259 -259]
arr (should not be modified): [   1    2    3    4    5    6    7    8    9   10 -259 -259 -259 -259 -259
   16   17   18   19]


Multidim arrays
2
2
[1 2 3]
[[ 1  2  3]
 [ 4  5  6]
 [12 13 14]]
[[1 2 3]
 [4 5 6]]
[[ 1  2]
 [ 4  5]
 [12 13]]


### Boolean Indexing 

In [109]:
## Have names in an array and 2d array 
## Use boolean indexing to select rows that 'match' name Bob in 1st array 
names = np.array(['Bob', 'John', 'Bob', 'Calypso', 'Amo'])
arr2d = np.random.randn(5,2)

print(arr2d)
print("Boolean Indexing: ",arr2d[names == 'Bob',:])

## @important @note - Important to make sure boolean array is the same length as axis that is being indexed
## Check: 
print(len(arr2d[:,0])) # Selecting 1st col as example
print(len(names == 'Bob'))




[[ 1.96700433 -0.2332836 ]
 [ 1.25277127  0.56085311]
 [ 0.64645504  1.80796221]
 [ 0.72722977 -0.19207564]
 [-0.43976158  0.49817731]]
Boolean Indexing:  [[ 1.96700433 -0.2332836 ]
 [ 0.64645504  1.80796221]]
5
5
