# numpy

In [129]:
# A package widely used in the data science commpunity which lets us work effeciently with arrays 
# and metrices in python

# 

# Numpy is the core library for scientific computing in Python. 
# It provides a high-performance multidimensional array object, and tools for working with these arrays. 

In [3]:
# importing numpy as np. This lets us use the shortcut np to refer to numpy
import numpy as np 

In [4]:
# creating an array
mylist=[1,2,3] # create a list
x=np.array(mylist) # pass list to numpy to convert it to array
x # print x

array([1, 2, 3])

In [5]:
# print mylist and see the difference between mylist and x
mylist

[1, 2, 3]

In [6]:
# we can also do this by passing the list to numpy directly
y=np.array([1,2,3])
y

array([1, 2, 3])

In [7]:
# making multidimentional arrays by passing in a list of lists
# we pass 2 lists of 3 elements each and we get a 2x3 (2 by 3) array
m=np.array([[7,8,9],[10,11,12]])
m

array([[ 7,  8,  9],
       [10, 11, 12]])

In [8]:
# print list of lists and see how its different from a 2-d numpy array
mylist_multidim=[[7,8,9],[10,11,12]]
mylist_multidim
# you will notice that its the same, only difference is how python treats it internally

[[7, 8, 9], [10, 11, 12]]

In [9]:
# we can check the dimentions by using the shape attribute
m.shape

(2, 3)

In [10]:
# we can extract the row
rowcount_m=m.shape[0]
rowcount_m

2

In [11]:
# we can extract the column
colcount_m=m.shape[1]
colcount_m

3

In [12]:
# generating arrays
# for the arange function we pass in a start a stop and a step size
# and it returns you evenly spaced values within a given interval
n=np.arange(0,30,2)
n

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28])

In [13]:
# we can use reshape to convert this array of numbers into a 3x5 array
n=n.reshape(3,5)
n

array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18],
       [20, 22, 24, 26, 28]])

In [14]:
n.shape

(3, 5)

In [15]:
# linspace function is similar to arange, except we tell it how many numbers we want returned and 
# it will split up the interval accordingly
# so we pass it a start a stop and how many numbers we want returned
o=np.linspace(0,4,9)
o

array([ 0. ,  0.5,  1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ])

In [16]:
# resize can be used to change the dimentions in place
o.resize(3,3)
o

array([[ 0. ,  0.5,  1. ],
       [ 1.5,  2. ,  2.5],
       [ 3. ,  3.5,  4. ]])

In [17]:
# difference between reshape and resize
ar = np.random.rand(2,3)
ar

array([[ 0.4854468 ,  0.02105605,  0.20702387],
       [ 0.23801935,  0.48399613,  0.90546879]])

In [18]:
ar.reshape(1,6)
ar # After reshape the array didn't change, but only outputs a temporary array reshape.

array([[ 0.4854468 ,  0.02105605,  0.20702387],
       [ 0.23801935,  0.48399613,  0.90546879]])

In [19]:
ar.resize(1,6)
ar # After resize the array changes it's shape

array([[ 0.4854468 ,  0.02105605,  0.20702387,  0.23801935,  0.48399613,
         0.90546879]])

In [20]:
# numpy also has several built in functions and shortcuts for creating array
np.ones((3,2)) # returns a 3x2 array of ones

array([[ 1.,  1.],
       [ 1.,  1.],
       [ 1.,  1.]])

In [21]:
np.zeros((2,3)) # returns a 2x3 array of zeros

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [22]:
np.eye(3) # returns a 3x3 array with ones on the diagonal and zeros everywhere else 

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [23]:
y=np.array([1,2,3])
np.diag(y) 

array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

In [24]:
# to create an array with repeated values we can pass in a repeated list, or we can use numpy's repeat function
np.array([1,2,3]*3)  

array([1, 2, 3, 1, 2, 3, 1, 2, 3])

In [25]:
np.repeat([1,2,3],3) 
# output: array([1, 1, 1, 2, 2, 2, 3, 3, 3])    
# notice difference between the two outputs

array([1, 1, 1, 2, 2, 2, 3, 3, 3])

In [26]:
# we can also combine arrays to create new ones
# lets create a 2x3 array of ones 
p=np.ones([2,3],int)
p

array([[1, 1, 1],
       [1, 1, 1]])

In [27]:
# and stack it vertically with itself multiplied by 2
np.vstack([p,2*p])   

array([[1, 1, 1],
       [1, 1, 1],
       [2, 2, 2],
       [2, 2, 2]])

In [28]:
# and stack it horizontally
np.hstack([p,2*p]) 

array([[1, 1, 1, 2, 2, 2],
       [1, 1, 1, 2, 2, 2]])

# basic math

In [29]:
# operations that can be done with numpy arrays
# element wise +,-,*,/
x=np.array([1,2,3])
y=np.array([4,5,6])
x+y

array([5, 7, 9])

In [30]:
x*y

array([ 4, 10, 18])

In [31]:
y-x

array([3, 3, 3])

In [32]:
# x to the power 2 (this is also element wize)
x**2 

array([1, 4, 9])

In [33]:
# linear algebra dot product can be done using the dot function
x.dot(y) 

32

In [34]:
# lets create a new array using the previous array y and its squared values
y=np.array([4,5,6])
z=np.array([y,y**2,y*2,y-1])
z

array([[ 4,  5,  6],
       [16, 25, 36],
       [ 8, 10, 12],
       [ 3,  4,  5]])

In [35]:
# transposing an array using the t method which swaps the rows and columns
z.T

array([[ 4, 16,  8,  3],
       [ 5, 25, 10,  4],
       [ 6, 36, 12,  5]])

In [36]:
z.dtype # we can see the type of data the array has

dtype('int64')

In [37]:
z=z.astype('f') # casting and array to a different type, from int64 to float32 in this case
z.dtype 

dtype('float32')

In [38]:
z

array([[  4.,   5.,   6.],
       [ 16.,  25.,  36.],
       [  8.,  10.,  12.],
       [  3.,   4.,   5.]], dtype=float32)

In [39]:
# commonly used math functions
a=np.array([-4,-2,1,3,5])
a.sum()

3

In [41]:
z.sum()

134.0

In [42]:
z.sum(axis=0) 

array([ 31.,  44.,  59.], dtype=float32)

In [43]:
z.sum(axis=1)

array([ 15.,  77.,  30.,  12.], dtype=float32)

In [44]:
a.max()

5

In [45]:
a.min()

-4

In [46]:
a.mean() # mean of all the values

0.59999999999999998

In [47]:
a.std() # standard deviation of all the values

3.2619012860600183

In [48]:
a.argmax() # index of the maximum value

4

In [49]:
a.argmin() # index of the minimum value  

0

# accessing array elements - indexing and slicing - colon notation

In [50]:
# create array of squares of 0-12
s=np.arange(13)**2
s

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100, 121, 144])

In [51]:
# we can use [] notation to get the value at a particular index
s[4]   # first value is at position 0

16

In [52]:
# colon notation to get a range
# notation is starting index, ending index and step size
# specifying the staring or ending index is not necessary
# we can also use negatives to count back from the end of the array   

In [53]:
# a[start:end] - items start through end-1
s[0:3]    

array([0, 1, 4])

In [54]:
# a[:end] - items from the beginning through end-1
s[:3] # this is same as s[0:3]   

array([0, 1, 4])

In [55]:
# a[start:] - items start through the rest of the array
s[3:]

array([  9,  16,  25,  36,  49,  64,  81, 100, 121, 144])

In [56]:
# a[:] - a copy of the whole array
s[:]

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100, 121, 144])

In [57]:
# a[start:end:step] - start through not past end, by step
s[::2]

array([  0,   4,  16,  36,  64, 100, 144])

In [58]:
s[:5:2]

array([ 0,  4, 16])

In [59]:
# a[-1] - last item in the array
s[-1]

144

In [60]:
s[-1:] # slice of last 1 element of the array

array([144])

In [61]:
s[-4] # 4th last item in the array

81

In [62]:
s[-4:] # slice of last 4 elements of the array

array([ 81, 100, 121, 144])

In [63]:
s[-5::-2] #starting 5th from the end to the beginning of the array and counting backwards by 2

array([64, 36, 16,  4,  0])

In [64]:
s[-1:]

array([144])

In [65]:
# a[:-2] - everything except the last two items
s[:-2]

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100])

## lets see how this extends to a 2 dimensional array

In [66]:
# lets create a 2 dimensional array from 0-35 and resize it as 6x6
r=np.arange(36)
r.resize((6,6))
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [67]:
r[2,2] # 14 is the value at second row second column

14

In [68]:
r[3,3:6] # colon notation used to get a slice of 3rd row column 3-6

array([21, 22, 23])

In [69]:
r[:2,:-1] # first two rows and all the columns except the last

array([[ 0,  1,  2,  3,  4],
       [ 6,  7,  8,  9, 10]])

In [70]:
r[-1,::2] # select every second element from the last row

array([30, 32, 34])

In [71]:
r[r>30] # use bracket operator to do conditional indexing and assignment

array([31, 32, 33, 34, 35])

In [72]:
r[r>30]=30 # capping the max value of elements in the array to 30
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

## copying data

In [73]:
# first lets create a new array r2 which is the slice of the array r
r2=r[:3,:3]
r2

array([[ 0,  1,  2],
       [ 6,  7,  8],
       [12, 13, 14]])

In [74]:
r2[:]=0 # sets all elements of this array to 0
r2

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [75]:
# now if we look at the original array r, we see that the slice of data in the original array has also been changed
# this is something to keep in mind and be careful about when working with numpy arrays
r

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [76]:
# copy can be used to avoid this
r_copy=r.copy()
r_copy[:]=10
r_copy

array([[10, 10, 10, 10, 10, 10],
       [10, 10, 10, 10, 10, 10],
       [10, 10, 10, 10, 10, 10],
       [10, 10, 10, 10, 10, 10],
       [10, 10, 10, 10, 10, 10],
       [10, 10, 10, 10, 10, 10]])

In [77]:
r # values of r are still intact

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

### iterating over arrays

In [78]:
# first lets create a 4x3 array of random nubers from 0-9
test=np.random.randint(0,10,(4,3))
test

array([[5, 0, 5],
       [8, 9, 5],
       [9, 6, 7],
       [8, 5, 7]])

In [79]:
# iterate by row
for row in test:
    print (row)

[5 0 5]
[8 9 5]
[9 6 7]
[8 5 7]


In [80]:
# iterate by row index using the length function on test which returns the number of rows
for i in range(len(test)):
    print (test[i])

[5 0 5]
[8 9 5]
[9 6 7]
[8 5 7]


In [81]:
# we can combines these two ways of iterating by using enumerate, which gives us the row and the index of the row
for i,row in enumerate(test):
    print('row',i,'is',row)    

row 0 is [5 0 5]
row 1 is [8 9 5]
row 2 is [9 6 7]
row 3 is [8 5 7]


In [82]:
# zip
test2=test**2
print(test2)
# if we wish to iterate through both arrays we can use zip
for i,j in zip(test,test2):
    print(i,'+',j,'=',i+j)

[[25  0 25]
 [64 81 25]
 [81 36 49]
 [64 25 49]]
[5 0 5] + [25  0 25] = [30  0 30]
[8 9 5] + [64 81 25] = [72 90 30]
[9 6 7] + [81 36 49] = [90 42 56]
[8 5 7] + [64 25 49] = [72 30 56]


### range function explained

In [83]:
# range(stop) - number of integers to generate starting from zero, for example range(3) == [0,1,2]

# range(start,stop,step)
# Start is the starting number of the sequence, 
# generate number up to but not including stop, 
# Step is difference between each number in the sequence

In [84]:
for i in range(3):
    print(i)

0
1
2


In [85]:
for i in range(3,6):
    print(i)

3
4
5


In [86]:
for i in range(4,10,2):
    print(i)

4
6
8


In [87]:
for i in range(0,-8,-2):
    print(i)

0
-2
-4
-6


In [None]:
# Thank you!