## Why Numpy?

Numpy operations on arrays are much faster than actions performed on python lists, because the behind the scenes optimizations are written in C.

## What is Numpy?

Numpy turns our data in series of numbers and then ML algo works out the pattern in those numbers and gets trained.

In [6]:
import numpy as np

## Datatypes and attributes

In [7]:
# Numpy's main datatype is ndarray, where ndarray stands for n dimensional array.

In [8]:
a1=np.array([1,2,3])
a1

array([1, 2, 3])

In [9]:
type(a1)

numpy.ndarray

![](Anatomy.png)

In [14]:
a2=np.array([[1,2,3],
             [4,5,6]])
a2

array([[1, 2, 3],
       [4, 5, 6]])

In [16]:
a3=np.array([[[1,2,3],
              [4,5,6],
              [7,8,9]],
             [[10,11,12],
              [13,14,15],
              [16,17,18]]])
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [18]:
a1.shape,a2.shape,a3.shape

((3,), (2, 3), (2, 3, 3))

In [19]:
a1.ndim,a2.ndim,a3.ndim

(1, 2, 3)

In [20]:
a1.dtype,a2.dtype,a3.dtype

(dtype('int32'), dtype('int32'), dtype('int32'))

In [21]:
a1.size,a2.size,a3.size

(3, 6, 18)

In [24]:
import pandas as pd
df=pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6


## Creating numpy arrays

In [27]:
ones=np.ones((3,4))
ones

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [29]:
zeros=np.zeros((2,3))
zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [32]:
sample_arr1=np.arange(1,12,2)
sample_arr1

array([ 1,  3,  5,  7,  9, 11])

In [33]:
sample_arr2=np.random.randint(0,10,size=(3,5))

In [34]:
sample_arr2

array([[3, 6, 8, 7, 7],
       [4, 7, 0, 2, 1],
       [2, 8, 4, 2, 2]])

In [37]:
sample_arr3=np.random.random_integers(1,10,size=(3,10))
sample_arr3

  """Entry point for launching an IPython kernel.


array([[ 3, 10,  6,  8,  7,  7,  8,  5,  6,  6],
       [10,  8,  3,  6,  1,  9,  7,  9,  7,  9],
       [ 6, 10, 10, 10,  7,  4,  1,  3,  4,  5]])

In [40]:
sample_arr3=np.random.random((9,3))
sample_arr3

array([[0.25296069, 0.93778381, 0.70726475],
       [0.04254126, 0.45763648, 0.35457563],
       [0.55825001, 0.70856648, 0.55155964],
       [0.12027218, 0.86641209, 0.88377186],
       [0.30353163, 0.61857742, 0.95080066],
       [0.2862792 , 0.21897918, 0.85563276],
       [0.08685005, 0.55917362, 0.57326326],
       [0.40012788, 0.68695456, 0.65878759],
       [0.70953494, 0.17527777, 0.5756031 ]])

In [42]:
sample_arr4=np.random.rand(4,3)
sample_arr4

array([[0.73725599, 0.87336247, 0.88945467],
       [0.28099675, 0.67259712, 0.60493991],
       [0.46027217, 0.97688546, 0.98817325],
       [0.75199563, 0.8025442 , 0.71537308]])

In [43]:
#Pseudo random numbers.
#"The random numbers generated are actually not random."
#np.random.seed()

In [51]:
np.random.seed(10)
sample_arr5=np.random.rand(5,3)
sample_arr5
#Using np.random.seed(10) fixes/assigns the same set of data every time you'll run the code.

array([[0.77132064, 0.02075195, 0.63364823],
       [0.74880388, 0.49850701, 0.22479665],
       [0.19806286, 0.76053071, 0.16911084],
       [0.08833981, 0.68535982, 0.95339335],
       [0.00394827, 0.51219226, 0.81262096]])

## Viewing arrays

In [54]:
sample_arr2

array([[3, 6, 8, 7, 7],
       [4, 7, 0, 2, 1],
       [2, 8, 4, 2, 2]])

In [55]:
np.unique(sample_arr2)

array([0, 1, 2, 3, 4, 6, 7, 8])

In [57]:
a1

array([1, 2, 3])

In [58]:
a2

array([[1, 2, 3],
       [4, 5, 6]])

In [59]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [60]:
a2[1]

array([4, 5, 6])

In [62]:
a3[1,0]

array([10, 11, 12])

In [66]:
a3[0,2,1]

8

In [69]:
a3[:2,1,2]

array([ 6, 15])

In [83]:
sample_arr7=np.random.randint(1,10,size=(2,3,4,5))

In [84]:
sample_arr7

array([[[[7, 5, 7, 2, 7],
         [8, 6, 3, 9, 1],
         [1, 4, 4, 2, 2],
         [9, 8, 5, 5, 7]],

        [[4, 1, 1, 7, 3],
         [5, 3, 4, 3, 2],
         [2, 3, 1, 8, 8],
         [4, 2, 4, 4, 5]],

        [[7, 4, 4, 4, 5],
         [7, 1, 9, 9, 9],
         [8, 8, 3, 6, 3],
         [9, 6, 5, 4, 4]]],


       [[[9, 8, 3, 9, 1],
         [6, 3, 6, 5, 8],
         [6, 7, 2, 4, 7],
         [5, 2, 3, 9, 3]],

        [[5, 8, 7, 8, 7],
         [4, 7, 2, 7, 9],
         [9, 9, 1, 2, 9],
         [5, 3, 3, 4, 9]],

        [[4, 2, 8, 4, 7],
         [2, 1, 3, 1, 8],
         [1, 1, 2, 7, 5],
         [6, 1, 8, 7, 3]]]])

In [85]:
sample_arr7[1,1,0,:4]

array([5, 8, 7, 8])

## Manipulating and Comparing arrays

In [120]:
#arithmetic

In [92]:
a1=np.array([2,4,3])

In [103]:
a2=np.array([[3,5,1],
             [3,8,9]])

In [104]:
a1+a2

array([[ 5,  9,  4],
       [ 5, 12, 12]])

In [105]:
a1*a2

array([[ 6, 20,  3],
       [ 6, 32, 27]])

In [106]:
a3=np.random.randint(10,size=(2,3,4))

In [107]:
a3

array([[[7, 8, 4, 2],
        [5, 5, 4, 2],
        [4, 6, 3, 6]],

       [[1, 8, 6, 1],
        [5, 0, 9, 9],
        [2, 1, 3, 9]]])

In [108]:
a2+a3

ValueError: operands could not be broadcast together with shapes (2,3) (2,3,4) 

In [109]:
# Numpy allows us to perform various arithmetic operations.

In [112]:
a1

array([2, 4, 3])

In [110]:
a1//2

array([1, 2, 1], dtype=int32)

In [111]:
a1/2

array([1. , 2. , 1.5])

In [113]:
np.exp(a1)

array([ 7.3890561 , 54.59815003, 20.08553692])

In [114]:
np.log(a1)

array([0.69314718, 1.38629436, 1.09861229])

In [116]:
np.square(a1)

array([ 4, 16,  9], dtype=int32)

In [117]:
a1%3

array([2, 1, 0], dtype=int32)

In [119]:
#Aggregation

In [121]:
a4=[8,9,10]
type(a4)

list

In [123]:
a5=np.array([6,9,10])
type(a5)

numpy.ndarray

In [128]:
sum(a4)

27

In [125]:
np.sum(a4)

27

In [127]:
sum(a5)

25

In [129]:
np.sum(a5)

25

In [130]:
#Use numpy's methods with numpy data and python methods with python data.

In [131]:
a6=np.random.random(1000)

In [133]:
a6[1:10]

array([0.01734313, 0.26538188, 0.63391112, 0.65400531, 0.66925783,
       0.1837397 , 0.8831859 , 0.51512807, 0.14180646])

In [134]:
%timeit sum(a6) #Python's sum
%timeit np.sum(a6) #Numpy's sum

146 µs ± 3.94 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
4.26 µs ± 37.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
