### <font color="brown">NumPy Continued</font>

---

In [45]:
import numpy as np

#### Slicing with boolean filter

---

**Quick review of the last part of Oct 19**

In [46]:
arr=np.arange(9)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [47]:
slc = arr[arr > 4]  # pick elements > 4
slc

array([5, 6, 7, 8])

In [48]:
# basically what you are doing is making a boolean filter array, then applying it on arr
filter = arr > 4
filter

array([False, False, False, False, False,  True,  True,  True,  True])

In [49]:
slc = arr[filter]
slc

array([5, 6, 7, 8])

---

**Ok, onward**

In [165]:
arr2d = np.arange(1,13).reshape(4,3)
arr2d

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [167]:
arr2d_slc = arr2d[[True,False,True,True]] # get all but 2nd row
print(arr2d_slc)

[[ 1  2  3]
 [ 7  8  9]
 [10 11 12]]


In [168]:
arr2d_slc[0] = 0  # change 1st row to all zeros
arr2d_slc

array([[ 0,  0,  0],
       [ 7,  8,  9],
       [10, 11, 12]])

In [169]:
arr2d   # unchanged

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [170]:
arr2d[[i%2 == 0 for i in range(4)]]  # even indexed rows

array([[1, 2, 3],
       [7, 8, 9]])

**applying a boolean mask from one array to another**

In [172]:
numarr = np.array([2,5,4,12])
arr2d[(numarr % 2 == 0)]   # basically arr2d[[True,False,True,False]]

array([[ 1,  2,  3],
       [ 7,  8,  9],
       [10, 11, 12]])

In [174]:
arr2d[~(numarr % 2 == 0)]  # negation, gets only the 2nd row of arr2d

array([[4, 5, 6]])

In [175]:
arr2d[(numarr % 2 == 0),0]  # only the 1st column of selected rows

array([ 1,  7, 10])

In [176]:
mask = (numarr < 3) | (numarr > 10)
mask

array([ True, False, False,  True])

In [177]:
arr2d[mask]  # first and last rows

array([[ 1,  2,  3],
       [10, 11, 12]])

In [178]:
arr2dcopy = arr2d.copy()
arr2dcopy[arr2dcopy > 9] = 0  # set all values > 9 to 0
arr2dcopy

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9],
       [0, 0, 0]])

**Global filtering with any and all**

In [181]:
arr = np.array([0,1,-5,2,9,0,3,-4,6])
print(arr.any())   

True


*0 is False, non-zero is True*

In [183]:
np.zeros(9).any()

False

In [184]:
arr.all()

False

In [185]:
np.ones(9).all()

True

---

#### Universal Function, or ufunc, is a function that performs element-wise operations on ndarrays.<br>Unary ufuncs work on a single ndarry, binary ufuncs work on a pair

---

#### <font color="brown">Some unary ufuncs</font>

In [3]:
import numpy as np

In [4]:
arr = np.arange(1,6)
arr

array([1, 2, 3, 4, 5])

In [5]:
np.exp(arr)  # computes e^x for each x in arr

array([  2.71828183,   7.3890561 ,  20.08553692,  54.59815003,
       148.4131591 ])

In [6]:
np.square(arr)

array([ 1,  4,  9, 16, 25])

In [7]:
np.sqrt(np.square(arr))

array([1., 2., 3., 4., 5.])

In [8]:
np.power(arr,3)

array([  1,   8,  27,  64, 125])

In [27]:
arr2 = np.arange(-3,4)
arr2

array([-3, -2, -1,  0,  1,  2,  3])

In [10]:
np.abs(arr2)

array([3, 2, 1, 0, 1, 2, 3])

In [11]:
np.fabs(arr2)  # same, but gives real numbers, faster than abs

array([3., 2., 1., 0., 1., 2., 3.])

In [12]:
np.fabs(arr2).astype(int)

array([3, 2, 1, 0, 1, 2, 3])

In [4]:
np.fabs(np.array([1.2,-3.5,1,-16]))

array([ 1.2,  3.5,  1. , 16. ])

In [5]:
np.abs(np.array([1.2,-3.5,1,-16]))

array([ 1.2,  3.5,  1. , 16. ])

In [11]:
arr2d = np.arange(1,10).reshape(3,3)
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [12]:
np.square(arr2d)

array([[ 1,  4,  9],
       [16, 25, 36],
       [49, 64, 81]])

In [13]:
np.power(arr2d,2)

array([[ 1,  4,  9],
       [16, 25, 36],
       [49, 64, 81]])

In [14]:
arr2d   # does not change original array

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [52]:
# ceil, floor, round to nearest integer
arr = np.exp(np.arange(1,6))
print(arr)
print(np.ceil(arr))
print(np.floor(arr))
print(np.rint(arr))

[  2.71828183   7.3890561   20.08553692  54.59815003 148.4131591 ]
[  3.   8.  21.  55. 149.]
[  2.   7.  20.  54. 148.]
[  3.   7.  20.  55. 148.]


In [53]:
# is nan
arr = np.array([1,2,4,5]) 
print(np.isnan(arr))

[False False False False]


In [54]:
# np.nan gives NaN
arr = np.array([1,2,np.nan,4,5])  # NaN is value used to denote not available, or null
print(np.isnan(arr))

[False False  True False False]


---

#### <font color='brown'>Some binary ufuncs</font>

In [7]:
# raise elements of first array to elements of second array
arr1 = [1,2,3,4]
arr2 = [2,1,2,3]
np.power(arr1,arr2)

array([ 1,  2,  9, 64])

In [8]:
powers = np.ones((3,3));
powers[0] = powers[0]*2
powers[2] = powers[2]*3
powers = powers.astype(int)
powers

array([[2, 2, 2],
       [1, 1, 1],
       [3, 3, 3]])

In [15]:
np.power(arr2d,powers)

array([[  1,   4,   9],
       [  4,   5,   6],
       [343, 512, 729]])

In [16]:
# element-wise maximum
np.maximum(arr1,arr2)

array([2, 2, 3, 4])

In [17]:
arr = np.array([1,2,np.nan,4,5])
print(arr)
arr3 = [3,5,1,15,7]
print(arr3)

[ 1.  2. nan  4.  5.]
[3, 5, 1, 15, 7]


In [18]:
np.maximum(arr3,arr)

array([ 3.,  5., nan, 15.,  7.])

In [21]:
np.max(arr3,arr)  # max is not binary unfunc

TypeError: only integer scalar arrays can be converted to a scalar index

In [23]:
np.max(arr3,0)  # max is not binary unfunc, second arg for max is "axis"

15

**The binary ufunc name (maximum) is different from unary ufunc name (max) because the 2nd argument has a different meaning from one to the other**

In [27]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [28]:
print(np.max(arr2d, 1))  # maximums for each of the rows, dimension argument is 1

[3 6 9]


In [29]:
print(np.max(arr2d, 0))  # maximums for each of the columns, dimension argument is 0

[7 8 9]


In [24]:
np.fmax(arr3,arr)   # fmax ignores NaN

array([ 3.,  5.,  1., 15.,  7.])

In [25]:
print(arr1)
print(arr2)
np.greater(arr1,arr2)  

[1, 2, 3, 4]
[2, 1, 2, 3]


array([False,  True,  True,  True])

---

#### <font color="brown">Using np.random</font>

In [77]:
np.random.randint(-50,0)  
# single random integer between -50 (inclusive) and 0 (exclusive)
# same as basic Python random.randint except in basic Python random.randint, 2nd arg is inclusive

-26

In [83]:
# same as basic Python random.randint except in basic Python random.randint, 2nd arg is inclusive
import random
random.randint(-50,0)

-45

In [78]:
np.random.randint(1,100,5)  
# 5 random integers between 1 (inclusive) and 100 (exclusive)

array([28, 99, 64, 12, 79])

In [79]:
np.random.randint(1,100,(2,3))  
# fill (2,3) array with random integers in range 1..99

array([[36, 50, 38],
       [70, 82, 98]])

In [32]:
np.random.random(5)  # 5 random reals 0 (inclusive) thru 1 (exclusive)

array([0.38084917, 0.37413469, 0.79919083, 0.48883297, 0.84682203])

In [86]:
# basic Python version returns a single random number in the range [0,1)
random.random()

0.5622472062442928

In [33]:
np.random.random((3,2)) * 5

array([[2.82717881, 4.33608115],
       [1.94999957, 4.12455107],
       [3.932484  , 1.51020188]])

In [88]:
nlst = [1,5,3,2,19,12,22,18,75,2,-10,0,15]
np.random.choice(nlst,3)  # 3 random selections from nlst

array([ 0, 22, 12])

In [91]:
# Python random.choice gives a single item
random.choice(nlst)  

15

In [37]:
np.random.choice(nlst,8)  

array([ 19,   5,  19,   3,  18, -10,   2,  22])

In [38]:
np.random.choice(nlst,size=8,replace=False)   # no duplicates

array([19,  1, 18,  2,  2,  0, 15,  5])

In [39]:
arrs = np.arange(1,10)
arrs

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [40]:
np.random.shuffle(arrs)
arrs

array([1, 9, 7, 5, 3, 8, 6, 2, 4])

In [41]:
arrs2d = np.random.randint(1,50,(3,4))
arrs2d

array([[33,  3, 27,  6],
       [25, 18, 37, 13],
       [ 3, 31, 15,  3]])

In [42]:
np.random.shuffle(arrs2d)  # only shuffles rows 
arrs2d

array([[ 3, 31, 15,  3],
       [25, 18, 37, 13],
       [33,  3, 27,  6]])

In [43]:
arrp = np.arange(1,10)
np.random.permutation(arrp)

array([9, 8, 4, 1, 6, 2, 7, 5, 3])

In [44]:
arrp  # permutation returns a new array, original is untouched

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [45]:
arrp2d = np.random.randint(1,50,(5,3))
arrp2d

array([[48, 25, 49],
       [20, 16, 11],
       [12, 31, 49],
       [ 3, 42, 45],
       [22, 19, 11]])

In [46]:
np.random.permutation(arrp2d)  # only permutes rows

array([[12, 31, 49],
       [20, 16, 11],
       [ 3, 42, 45],
       [48, 25, 49],
       [22, 19, 11]])

In [76]:
arrp2d  # original not changed

array([[47,  9, 44],
       [12, 13, 31],
       [32, 38, 37],
       [35,  1, 19],
       [22, 27, 45]])

---

#### <font color="brown">Math and stats functions ("reductions")</font>
##### min, max, mean, sum, std, cumsum, argmax, argmin

In [99]:
arrx = np.random.randint(1,20,5)
print(arrx)
print(arrx.max())  
print(arrx.min())
print(arrx.mean())
print(arrx.sum())
print(arrx.std())
print(arrx.cumsum())

[17  3 14  2 15]
17
2
10.2
51
6.368673331236264
[17 20 34 36 51]


In [98]:
print(np.max(arrx)) # can also use np function instead of ndarray method

18


In [99]:
print(np.cumsum(arrx))

[ 1 17 35 36 39]


In [100]:
np.array([3,1,-10,5,2,0,-10]).argmin()  # index of minimum value, ndarray method

2

In [101]:
np.argmin(np.array([3,1,-10,5,2,0,-10]))  # np function

2

In [34]:
arr2d = np.arange(1,10).reshape(3,3)[[1,0,2]][:,[1,2,0]]
arr2d

array([[5, 6, 4],
       [2, 3, 1],
       [8, 9, 7]])

In [42]:
print('max in each column: ',arr2d.max(axis=0))
print('row index of max in each column: ',arr2d.argmax(axis=0))
print('max in each row: ',arr2d.max(axis=1))
print('column index of max in each row: ',arr2d.argmax(axis=1))

max in each column:  [8 9 7]
row index of max in each column:  [2 2 2]
max in each row:  [6 3 9]
column index of max in each row:  [1 1 1]


In [35]:
arr2d.argmax(axis=1)

array([2, 2, 2])

In [42]:
arr2d.mean()

5.0

In [None]:
arr2d.argmax(axis=)

In [43]:
arr2d.mean(axis=1)  # mean of column values for each row

array([2., 5., 8.])

In [44]:
arr2d.mean(axis=0)  # mean of row values for each column

array([4., 5., 6.])

In [102]:
arr2d.cumsum(axis=0)  # cumulative sum for each column

array([[ 1,  2,  3],
       [ 5,  7,  9],
       [12, 15, 18]])

---

#### <font color="brown">Unique</font>

In [110]:
scores = np.array([10,9,9,8,2,3,2,7,5,8])
np.unique(scores)   # sorted list of unique values

array([ 2,  3,  5,  7,  8,  9, 10])

In [126]:
arr

array([[ 3,  4,  9],
       [10, 12,  1],
       [ 9,  8,  4],
       [ 2,  3, 12]])

In [127]:
np.unique(arr)

array([ 1,  2,  3,  4,  8,  9, 10, 12])

---

#### <font color="brown">Sorting</font>

In [103]:
# sort
a = np.array([3,-1,2,5,15,22,-10,85])
print(a)

[  3  -1   2   5  15  22 -10  85]


In [104]:
np.sort(a)

array([-10,  -1,   2,   3,   5,  15,  22,  85])

In [4]:
print(a)  # np.sort(a) doesn't change the original array

[  3  -1   2   5  15  22 -10  85]


In [108]:
a.sort()  # ndarray sort method changes the array
print(a)

[-10  -1   2   3   5  15  22  85]


In [109]:
# sort
a = np.array([3,-1,2,5,15,22,-10,85])
print(a)

[  3  -1   2   5  15  22 -10  85]


In [111]:
# positions of sorted values
sort_ix = np.argsort(a)
print(np.sort(a))
print(sort_ix)

[-10  -1   2   3   5  15  22  85]
[6 1 2 0 3 4 5 7]


In [112]:
a.argsort()

array([6, 1, 2, 0, 3, 4, 5, 7])

In [114]:
print(a)   # ndarray argsort method does not change original array

[  3  -1   2   5  15  22 -10  85]


In [115]:
a = np.array([3,-1,2,5,15,22,-10,85])
b = np.array([9,2,15,10,1,3,-2,5])
c = np.array([a,b])
print(c)

[[  3  -1   2   5  15  22 -10  85]
 [  9   2  15  10   1   3  -2   5]]


In [116]:
np.sort(c)

array([[-10,  -1,   2,   3,   5,  15,  22,  85],
       [ -2,   1,   2,   3,   5,   9,  10,  15]])

In [117]:
np.argsort(c)  # row-wise indexes of sorted values in original

array([[6, 1, 2, 0, 3, 4, 5, 7],
       [6, 4, 1, 5, 7, 0, 3, 2]])

In [118]:
# positions of max 3 values in each row
np.argsort(c)[:,-3:]

array([[4, 5, 7],
       [0, 3, 2]])

In [122]:
arr = np.random.randint(1,13,(4,3))
print(arr)

[[ 3  4  9]
 [10 12  1]
 [ 9  8  4]
 [ 2  3 12]]


In [123]:
np.sort(arr,axis=0)   # sort along columns

array([[ 2,  3,  1],
       [ 3,  4,  4],
       [ 9,  8,  9],
       [10, 12, 12]])

In [124]:
np.argsort(arr,axis=0)

array([[3, 3, 1],
       [0, 0, 2],
       [2, 2, 0],
       [1, 1, 3]])