# Section 5: Python for Data Analysis - NumPy

## 01- NumPy arrays 

### Casting objects into numpy arrays

In [1]:
##NumPy arrays - The main way we are going to use the NumPy library 
##Arrays can either be called matrices or vectors depending if they are 2d or 1d

##Creating a numpy array from a Python object such as a list

listarr = [1,2,3]

In [2]:
import numpy as np

In [3]:
arr = np.array(listarr) ##Casting a list as an array - Returns an array as the container of that list object 

arr

array([1, 2, 3])

In [4]:
## To obtain a 2-d array we can cast a list of lists as follows: 

my_mat = [[1,2,3], [4,5,6],[7,8,9]]

##Casting as an array 

twoDarr = np.array(my_mat)

twoDarr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

### NumPy built-in array generation

In [5]:
##Usually we will be using numpy buillt-in array generation methods 

##Using arange (array range)
np.arange(0,11,2)

array([ 0,  2,  4,  6,  8, 10])

In [6]:
##We can also generate specific types of arrays such as zeros 

np.zeros(3) ##3 is the size of the vector 

array([0., 0., 0.])

In [7]:
np.zeros((2,3)) ##Passing a tuple to generate a matrix - n brackets denote n dimensions in the output 

array([[0., 0., 0.],
       [0., 0., 0.]])

In [8]:
###Arrays of pure ones 
np.ones(3)

array([1., 1., 1.])

In [9]:
np.ones((3,3)) ##Pass a tuple to specify more dimensions 

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [10]:
##np.linspace returns an array of evenly spaced numbers over a specified interval 
##Do not confuse with arange()

np.linspace(0,5, num = 10) #1d vector of num eveny spaced points from start = 0 to stop =10


array([0.        , 0.55555556, 1.11111111, 1.66666667, 2.22222222,
       2.77777778, 3.33333333, 3.88888889, 4.44444444, 5.        ])

In [11]:
##Creating an identity matrix in numpy 
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [12]:
##Now let's create a matrix of random numbers 

np.random.rand(10) ##Creates a uniformly distributed array with dimensions passed as a parameter.  


array([0.04459827, 0.82178571, 0.65226554, 0.42065984, 0.04169048,
       0.44970523, 0.83147722, 0.43921933, 0.6829027 , 0.14224643])

In [13]:
##A matrix of random numbers
np.random.rand(5,5)

array([[0.60985469, 0.58127845, 0.79285208, 0.7329941 , 0.65929335],
       [0.96608849, 0.20716303, 0.77599964, 0.64592323, 0.8663622 ],
       [0.66980826, 0.2069001 , 0.29539995, 0.877632  , 0.89236615],
       [0.46332737, 0.24661009, 0.11212292, 0.41183214, 0.69894757],
       [0.85108414, 0.29019061, 0.20077139, 0.24704805, 0.75639647]])

In [14]:
##Generating a sample from the uniform standard distribution (mean = 0, stdv = 1)
np.random.randn(3)

array([ 2.42457794, -0.0143759 , -0.86336673])

In [15]:
np.random.randn(3,3)

array([[-0.14255602,  1.97323915, -1.56944685],
       [ 0.18460791,  2.17519791, -0.71685832],
       [-1.0498395 , -1.22047501,  0.67024948]])

In [32]:
##Using randint to return random integers from a low to a high integer (Discrete uniform distribution)

np.random.randint(1,100,5) ## Lower endpoint is inclusive, higher endpoint is exclusive and the third parameter is the array size

array([49, 94, 74, 43, 19])

### Useful np arrays attributes and methods

In [17]:
##Lets create some arrays first
arr = np.arange(25)

In [18]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [19]:
ranarr = np.random.randint(0,50,10)

In [20]:
ranarr

array([19, 29, 49, 14,  6, 22, 23, 32, 32, 26])

In [21]:
## Method 1: reshape() -Returns an array containing the same data with a different  shape 

arr.reshape(5,5) ##The new dimensions of the array must have the same number of elements as the original array

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [22]:
## Method 2: Finding the max()_and min() values 

arr.max() ##Call the max() method

24

In [23]:
arr.min() ##Call the min() method

0

In [24]:
## Method 3: Finding the argmax and argmin values - Index of max and min values 
ranarr.argmax()

2

In [25]:
ranarr.argmin()

4

In [26]:
##Attribute 1: Finding the shape of your vector - Call the .shape attribute
arr.shape

(25,)

In [27]:
arr =arr.reshape(5,5) ##Be sure to reassign first
arr.shape

(5, 5)

In [28]:
##Attribute 2: Data type of your arrays 
arr.dtype

dtype('int64')

In [29]:
### Using imports to simpify code 

from numpy.random import randint as rdi 

rdi(1,10,2)

array([8, 2])