# numpy
- Best array data manipulation, fast  
- numpy array allows only single data type, unlike list  
- Support matrix operation

## Environment Setup

In [135]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:75% !important; margin-left:350px; }</style>"))
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
pd.set_option( 'display.notebook_repr_html', False)  # render Series and DataFrame as text, not HTML
pd.set_option( 'display.max_column', 10)    # number of columns
pd.set_option( 'display.max_rows', 10)     # number of rows
pd.set_option( 'display.width', 90)        # number of characters per row

## Version

In [136]:
np.__version__

'1.13.3'

## numpy Objects

### numpy Array - numpy.ndarray

In [137]:
a = np.array( (1,2,3,4,5), dtype='float' )
type(a)

numpy.ndarray

### numpy Data Types - numpy.dtype

- **numpy array is homogeneous**. All elements are single data type  
- **numpy array** has a property called **.dtype**, it tells the data type of its element  


In [138]:
a.dtype

dtype('float64')

This property is of type **numpy.dtype**

In [139]:
type(a.dtype)

numpy.dtype

numpy uses **its own data types** instead of built-in python data types, this makes numpy **much more powerful**

#### Float

In [140]:
type(a[1])

numpy.float64

#### Integer

In [141]:
np.array((1,2,3)).dtype

dtype('int32')

In [142]:
type(np.array([1,2,3])[1])

numpy.int32

#### String

In [143]:
a = np.array(('a','bb','ccc','dddd','fffff'))
a.dtype

dtype('<U5')

In [144]:
type(a[1])

numpy.str_

#### Boolean

In [145]:
a = np.array((True,True,False))
a.dtype

dtype('bool')

In [146]:
type(a[1])

numpy.bool_

## Create numpy array

dtype: numpy.ndarray

### From Data
- Data type is detection is by default, it will default to autodetect, which suggest the common denominator
```
numpy.array( list,                dtype= ) # dtype default is auto detect: 'int', 'float', anything else 'str'
numpy.array( (list1, list2, ...), dtype= )  # list of lists creates 2-Dim array
```

#### Single Dimension Array

In [147]:
x = np.array( [1,23,4,5] )
print (x)
print (x.dtype)

[ 1 23  4  5]
int32


In [148]:
x = np.array ( [1,2,3,4,5.1] )   # common denominator is float
print (x)
print (x.dtype)

[ 1.   2.   3.   4.   5.1]
float64


In [149]:
x = np.array((100,200,300,400,500,'six','seven'))   # common demoninator is string
print (x) 
print (x.dtype)

['100' '200' '300' '400' '500' 'six' 'seven']
<U11


#### Multi-Dimensional Array

In [150]:
d = ( (1,2,3,4,5), 
      (11,12,13,14,15),
      (21,22,23,24,25))
x = np.array(d)
print (x)
print (x.dtype)
print (x.ndim)  #number of dimensions

[[ 1  2  3  4  5]
 [11 12 13 14 15]
 [21 22 23 24 25]]
int32
2


### Auto Generate (numbers)

#### Using numpy.arange()

In [151]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

#### Using range()

**range()** is a generator. Shortcut is to use **np.arange**

In [152]:
np.array(range(10))

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

#### Two Dimensions

In [153]:
np.array([range(10),np.arange(10)])

array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

#### Ones

In [154]:
np.ones(10)  # One dimension, default is float

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [155]:
np.ones((2,5),'int')  #Two dimensions

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

#### Zeroes

In [156]:
np.zeros( 10 )    # One dimension, default is float

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [157]:
np.zeros((2,5),'int')   # 2 rows, 5 columns of ZERO

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

#### Identity Matrix

In [158]:
np.identity(5)

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])

## Dimension and Reshaping

### Sample Data

In [159]:
a = np.array([range(5), range(10,15), range(20,25), range(30,35)])
a

array([[ 0,  1,  2,  3,  4],
       [10, 11, 12, 13, 14],
       [20, 21, 22, 23, 24],
       [30, 31, 32, 33, 34]])

### Differentiating Dimensions
1-D array is array of single list  
2-D array is array made of list containing lists (each row is a list)  
2-D single row array is array with list containing just one list

#### 1-D Array
Observe that the **shape of the array** is (5,). It seems like an array with 5 rows, **empty columns** !  
What it really means is 5 items **single dimension**.

In [160]:
arr = np.array(range(5))
print (arr)
print (arr.shape)
print (arr.ndim)

[0 1 2 3 4]
(5,)
1


#### 2-D Array

In [161]:
arr = np.array([range(5),range(5,10),range(10,15)])
print (arr)
print (arr.shape)
print (arr.ndim)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
(3, 5)
2


#### 2-D Array - Single Row

In [162]:
arr = np.array([range(5)])
print (arr)
print (arr.shape)
print (arr.ndim)

[[0 1 2 3 4]]
(1, 5)
2


#### 2-D Array : Single Column
Using array slicing method with **newaxis** at **COLUMN**, will turn 1D array into 2D of **single column**

In [163]:
arr = np.arange(5)[:, np.newaxis]
print (arr)
print (arr.shape)
print (arr.ndim)

[[0]
 [1]
 [2]
 [3]
 [4]]
(5, 1)
2


Using array slicing method with **newaxis** at **ROW**, will turn 1D array into 2D of **single row**

In [164]:
arr = np.arange(5)[np.newaxis,:]
print (arr)
print (arr.shape)
print (arr.ndim)

[[0 1 2 3 4]]
(1, 5)
2


### Slicing 

In [165]:
a[1:3, 1:4] # row 1,2 column 1,2,3

array([[11, 12, 13],
       [21, 22, 23]])

### Get Dimension - .shape and .ndim

In [166]:
a.ndim   # number of dimensions

2

In [167]:
a.shape   # number of rows and columns

(4, 5)

### Change Dimension - .reshape()
```
reshape ( row numbers, col numbers )
```

#### Resphepe 1-Dim to 2-Dim

In [168]:
np.arange(12) # 1-D Array

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [169]:
np.arange(12).reshape(3,4)  # 2-D Array

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

#### Respahe 2-Dim to 2-Dim

In [170]:
np.array([range(5), range(10,15)])  # 2-D Array

array([[ 0,  1,  2,  3,  4],
       [10, 11, 12, 13, 14]])

In [171]:
np.array([range(5), range(10,15)]).reshape(5,2) # 2-D Array

array([[ 0,  1],
       [ 2,  3],
       [ 4, 10],
       [11, 12],
       [13, 14]])

#### Reshape 2-Dimension to 2-Dim (of single row)
- Change 2x10 to 1x10  
- Observe [[ ]], and the number of dimension is stll 2, don't be fooled

In [172]:
np.array( [range(0,5), range(5,10)])  # 2-D Array

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [173]:
np.array( [range(0,5), range(5,10)]).reshape(1,10) # 2-D Array

array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

#### Reshape 1-Dim Array to 2-Dim Array (single column)

In [174]:
np.arange(8)

array([0, 1, 2, 3, 4, 5, 6, 7])

In [175]:
np.arange(8).reshape(8,1)

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7]])

A better method, use **newaxis**, easier because no need to input row number as parameter

In [176]:
np.arange(8)[:,np.newaxis]

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7]])

#### Reshape 1-Dim Array to 2-Dim Array (single row)

In [177]:
np.arange(8)

array([0, 1, 2, 3, 4, 5, 6, 7])

In [178]:
np.arange(8)[np.newaxis,:]

array([[0, 1, 2, 3, 4, 5, 6, 7]])

## Accessing

### Single Dimension Array

In [179]:
a = np.arange(10)
a[2]     # item-3

2

In [180]:
a[2:5]   #item 3 to 5

array([2, 3, 4])

## Vectorized / Matrix Operation

### *

In [181]:
ar = np.arange(10)
print (ar)
print (ar*2)

[0 1 2 3 4 5 6 7 8 9]
[ 0  2  4  6  8 10 12 14 16 18]


### + and -

In [182]:
ar = np.arange(10)
print (ar+2)
print (ar-2)

[ 2  3  4  5  6  7  8  9 10 11]
[-2 -1  0  1  2  3  4  5  6  7]


#### on boolean --> number

In [183]:
np.array((True,True,False))-1

array([ 0,  0, -1])

### .transpose()

In [184]:
ar = np.array([np.arange(5),np.arange(5)])
print ('ar :\n',ar)
print ('ar transposed :\n',ar.transpose())

ar :
 [[0 1 2 3 4]
 [0 1 2 3 4]]
ar transposed :
 [[0 0]
 [1 1]
 [2 2]
 [3 3]
 [4 4]]


### Selecting Elements base on criteria
```
np.where(array condition, value if true, value if false)
```

In [185]:
ar = np.array([(1,2,3,4,5),(11,12,13,14,15),(21,22,23,24,25)])
print ('Data : \n', ar)
np.where(ar%2==0,1,0) # select all even elements

Data : 
 [[ 1  2  3  4  5]
 [11 12 13 14 15]
 [21 22 23 24 25]]


array([[0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0],
       [0, 1, 0, 1, 0]])

## Random Numbers

### Uniform Distribution

#### Random Integer (with Replacement)
**randint()** Return random integers from **low (inclusive) to high (exclusive)**
```
np.random.randint( low )                  # generate an integer, i, which         i < low
np.random.randint( low, high )            # generate an integer, i, which  low <= i < high
np.random.randint( low, high, size=1)     # generate an ndarray of integer, single dimension
np.random.randint( low, high, size=(r,c)) # generate an ndarray of integer, two dimensions 
```

In [186]:
np.random.randint( 10 )

4

In [187]:
np.random.randint( 10, 20 )

11

In [188]:
np.random.randint( 10, high=20, size=5)   # single dimension

array([16, 13, 11, 17, 14])

In [189]:
np.random.randint( 10, 20, (3,5) )        # two dimensions

array([[19, 16, 17, 16, 11],
       [17, 14, 14, 12, 17],
       [15, 16, 18, 10, 13]])

#### Random Integer (with or without replacement)
```
numpy.random .choice( a, size, replace=True)
 # sampling from a, 
 #   if a is integer, then it is assumed sampling from arange(a)
 #   if a is an 1-D array, then sampling from this array
```

In [190]:
np.random.choice(10,5, replace=False) # take 5 samples from 0:19, without replacement

array([6, 7, 8, 5, 0])

In [191]:
np.random.choice( np.arange(10,20), 5, replace=False)

array([17, 15, 16, 11, 12])

#### Random Float
**randf()**  Generate float numbers in **between 0.0 and 1.0**
```
np.random.ranf(size=None)
```

In [192]:
np.random.ranf(4)

array([ 0.55169713,  0.19371057,  0.10250466,  0.08754451])

**uniform()** Return random float from **low (inclusive) to high (exclusive)**
```
np.random.uniform( low )                  # generate an float, i, which         f < low
np.random.uniform( low, high )            # generate an float, i, which  low <= f < high
np.random.uniform( low, high, size=1)     # generate an array of float, single dimension
np.random.uniform( low, high, size=(r,c)) # generate an array of float, two dimensions 
```

In [193]:
np.random.uniform( 2 )

1.5387359308665272

In [194]:
np.random.uniform( 2,5, size=(4,4) )

array([[ 2.69441533,  4.34918483,  3.20611778,  3.52037855],
       [ 3.51520275,  3.79189752,  4.0970882 ,  4.48708517],
       [ 2.58314791,  4.90327193,  3.54219271,  4.18653179],
       [ 4.31024428,  4.75501258,  3.61434279,  4.7567368 ]])

### Normal Distribution

```
numpy. random.randn (n_items)       # standard normal (mean=0, stdev=1)
numpy. random.randn (nrows, ncols)  # Two dimensions
numpy. random.standard_normal( size=None )                # default to mean = 0, stdev = 1, non-configurable
numpy. random.normal         ( loc=0, scale=1, size=None) # loc = mean, scale = stdev, size = dimension
```

#### Standard Normal Distribution
Generate random normal numbers with gaussion distribution (mean=0, stdev=1)

**One Dimension**

In [195]:
np.random.seed(125)
np.random.standard_normal(3)

array([-0.69883694,  0.01062308, -0.94678644])

In [196]:
np.random.seed(125)
np.random.randn(3)

array([-0.69883694,  0.01062308, -0.94678644])

**Two Dimensions**

In [197]:
np.random.seed(125)
np.random.randn(2,4)

array([[-0.69883694,  0.01062308, -0.94678644,  0.32872998],
       [ 0.31506457, -0.40282244, -2.63330372,  0.2019705 ]])

In [198]:
np.random.seed(125)
np.random.standard_normal((2,4))

array([[-0.69883694,  0.01062308, -0.94678644,  0.32872998],
       [ 0.31506457, -0.40282244, -2.63330372,  0.2019705 ]])

**Observe:** randn(), standard_normal() and normal() are able to generate standard normal numbers

In [199]:
np.random.seed(15)
print (np.random.randn(5))
np.random.seed(15)
print (np.random.normal ( size = 5 )) # stdev and mean not specified, default to standard normal
np.random.seed(15)
print (np.random.standard_normal (size=5))

[-0.31232848  0.33928471 -0.15590853 -0.50178967  0.23556889]
[-0.31232848  0.33928471 -0.15590853 -0.50178967  0.23556889]
[-0.31232848  0.33928471 -0.15590853 -0.50178967  0.23556889]


#### Normal Distribution (Non-Standard)

In [200]:
np.random.seed(125)
np.random.normal( loc = 12, scale=1.25, size=(3,3))

array([[ 11.12645382,  12.01327885,  10.81651695],
       [ 12.41091248,  12.39383072,  11.49647195],
       [  8.70837035,  12.25246312,  11.49084235]])

## Sampling (Integer)
```
random.choice( a, size=None, replace=True, p=None)  # a=integer, return <size> integers < a
random.choice( a, size=None, replace=True, p=None)  # a=array-like, return <size> integers picked from list a
```

In [201]:
np.random.choice (100, size=10)

array([58,  0, 84, 50, 89, 32, 87, 30, 66, 92])

In [202]:
np.random.choice( [1,3,5,7,9,11,13,15,17,19,21,23], size=10, replace=False)

array([ 5,  1, 23, 17,  3, 13, 15,  9, 21,  7])

## NaN : Missing Numerical Data

- You should be aware that NaN is a bit like a data virus–it infects any other object it touches  


In [203]:
t = np.array([1, np.nan, 3, 4]) 
t.dtype

dtype('float64')

Regardless of the operation, the result of arithmetic with NaN will be another NaN

In [204]:
1 + np.nan

nan

In [205]:
t.sum(), t.mean(), t.max()

(nan, nan, nan)

In [206]:
np.nansum(t), np.nanmean(t), np.nanmax(t)

(8.0, 2.6666666666666665, 4.0)