# Section 5: Python for Data Analysis - NumPy

## NumPy arrays 

### Casting objects into numpy arrays

In [1]:
##NumPy arrays - The main way we are going to use the NumPy library 
##Arrays can either be called matrices or vectors depending if they are 2d or 1d

##Creating a numpy array from a Python object such as a list

list = [1,2,3]

In [2]:
import numpy as np

In [3]:
arr = np.array(list) ##Casting a list as an array - Returns an array as the container of that list object 

arr

array([1, 2, 3])

In [4]:
## To obtain a 2-d array we can cast a list of lists as follows: 

my_mat = [[1,2,3], [4,5,6],[7,8,9]]

##Casting as an array 

twoDarr = np.array(my_mat)

twoDarr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

### NumPy built-in array generation

In [8]:
##Usually we will be using numpy buillt-in array generation methods 

##Using arange (array range)
np.arange(0,11,2)

array([ 0,  2,  4,  6,  8, 10])

In [9]:
##We can also generate specific types of arrays such as zeros 

np.zeros(3) ##3 is the size of the vector 

array([0., 0., 0.])

In [13]:
np.zeros((2,3)) ##Passing a tuple to generate a matrix - n brackets denote n dimensions in the output 

array([[0., 0., 0.],
       [0., 0., 0.]])

In [14]:
###Arrays of pure ones 
np.ones(3)

array([1., 1., 1.])

In [15]:
np.ones((3,3)) ##Pass a tuple to specify more dimensions 

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [16]:
##np.linspace returns evenly spaced numbers over a specified interval 
##Do not confuse with arange()

np.linspace(0,5, num = 10) #1d vector of num eveny spaced points from start = 0 to stop =10


array([0.        , 0.55555556, 1.11111111, 1.66666667, 2.22222222,
       2.77777778, 3.33333333, 3.88888889, 4.44444444, 5.        ])

In [17]:
##Creating an identity matrix in numpy 
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [18]:
##Now let's create a matrix of random numbers 

np.random.rand(10) ##Creates a uniformly distributed array with dimensions passed as a parameter.  


array([0.23641342, 0.18427455, 0.82613554, 0.76836912, 0.6256977 ,
       0.99622691, 0.69003018, 0.64695449, 0.73500142, 0.82134486])

In [20]:
##A matrix of random numbers
np.random.rand(5,5)

array([[0.37250314, 0.18364811, 0.18385558, 0.15753702, 0.56286406],
       [0.44575503, 0.68850104, 0.18779292, 0.02049141, 0.89985377],
       [0.43701725, 0.00686828, 0.91230851, 0.89225608, 0.47823681],
       [0.43109857, 0.36824063, 0.34115044, 0.7395725 , 0.86057376],
       [0.24723119, 0.44862803, 0.35158821, 0.89062455, 0.90405047]])

In [21]:
##Generating a sample from the uniform standard distribution (mean = 0, stdv = 1)
np.random.randn(3)

array([ 0.36820258, -0.16905085, -0.52476497])

In [22]:
np.random.randn(3,3)

array([[-1.63540902, -0.6588382 ,  1.21963954],
       [-0.24284764, -0.7730825 , -2.02685948],
       [ 0.56132587, -0.6540138 , -1.09905025]])

In [24]:
##Using randint to return random integers from a low to a high integer (Discrete uniform distribution)

np.random.randint(1,100,10) ## Lower endpoint is inclusive, higher endpoint is exclusive and the third parameter is the array size

array([61, 62, 41, 42, 70, 78,  8, 71, 35, 73])

### Useful np arrays attributes and methods

In [28]:
##Lets create some arrays first
arr = np.arange(25)

In [29]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [30]:
ranarr = np.random.randint(0,50,10)

In [31]:
ranarr

array([19, 25, 42, 22, 27, 10, 35,  2, 28, 27])

In [33]:
## Method 1: reshape() -Returns an array containing the same data with a different  shape 

arr.reshape(5,5) ##The new dimensions of the array must have the same number of elements as the original array

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [34]:
## Method 2: Finding the max()_and min() values 

arr.max() ##Call the max() method

24

In [35]:
arr.min() ##Call the min() method

0

In [36]:
## Method 3: Finding the argmax and argmin values - Index of max and min values 
ranarr.argmax()

2

In [37]:
ranarr.argmin()

7

In [38]:
##Attribute 1: Finding the shape of your vector - Call the .shape attribute
arr.shape

(25,)

In [40]:
arr =arr.reshape(5,5) ##Be sure to reassign first
arr.shape

(5, 5)

In [41]:
##Attribute 2: Data type of your arrays 
arr.dtype

dtype('int64')

In [42]:
### Using imports to simpify code 

from numpy.random import randint as rdi 

rdi(1,10,2)

array([8, 2])