# Numpy Intro 

In [71]:
from __future__ import print_function, division

In [72]:
# Standard imports for data analysis packages in Python
import pandas as pd
import numpy as np
#import seaborn as sns
import matplotlib.pyplot as plt


from IPython.display import Image

# This enables inline Plots
%matplotlib inline

# Limit rows displayed in notebook
pd.set_option('display.max_rows', 10)
pd.set_option('display.precision', 2)

In [73]:
print('Pandas Version: ', pd.__version__)
print('Numpy Version: ', np.__version__)

Pandas Version:  0.22.0
Numpy Version:  1.14.1


## Numpy

The numpy package is used in almost all numerical computation using Python. It is a package that provides high-performance vector, matrix and higher-dimensional data structures for Python. It implemented in C and Fortran and therefore fast when calculations are vectorized and we don't do loops in python.

* Pandas is built on top of Numpy.  Each Column in a Pandas DataFrame is a Numpy Array
* Plotting functions (matplotlib) needs Numpy Arrays as input
* Scikit-Learn needs Numpy Data Structures as input for Features and Labels and building Models

Let's do a overview of Numpy Data Structures and Functions.

### Data Structures

* Arrays
* Matrices

## Creating Numpy Arrays:

There are a number of ways to initialize new numpy arrays, for example from

* a Python list or tuples
* using functions that are dedicated to generating numpy arrays, such as `arange`, `linspace`, etc.
* reading data from files

### Arrays

In [74]:
# a vector: the argument to the array function is a Python list
mylist = [1, 2, 3, 4, 5, 6]

arr1 = np.array(mylist)

arr1

array([1, 2, 3, 4, 5, 6])

In [75]:
print('Shape of Array: ', arr1.shape)
print('Type of object: ', type(arr1))
print('Type of contents: ', arr1.dtype)
print('Size (elements): ', arr1.size)
print('ndim (number of dim): ', arr1.ndim)

Shape of Array:  (6,)
Type of object:  <class 'numpy.ndarray'>
Type of contents:  int64
Size (elements):  6
ndim (number of dim):  1


In [80]:
## 2D Array
arr2 = np.array([[1, 2, 3], 
                [4, 5, 6]])

arr2

array([[1, 2, 3],
       [4, 5, 6]])

In [81]:
print('Shape of Array: ', arr2.shape)
print('Type of object: ', type(arr2))
print('Type of contents: ', arr2.dtype)
print('Size (elements): ', arr2.size)
print('ndim (number of dim): ', arr2.ndim)

Shape of Array:  (2, 3)
Type of object:  <class 'numpy.ndarray'>
Type of contents:  int64
Size (elements):  6
ndim (number of dim):  2


## Selecting Elements in Numpy

In [8]:
# Selecting elements from array (Slicing)
arr1[1]


2

In [9]:
# Selecting a range - notice that it starts at lower limit up to the upper limt (not including).  Same behavior as Python list
arr1[1:3]

array([2, 3])

## Assigning Values

In [10]:
# You can assign values individually
arr1[0] = 10
arr1

array([10,  2,  3,  4,  5,  6])

In [11]:
# Or Assign a indivual value to a range
arr1[1:] = 20  # Start at 1st element till END
arr1

array([10, 20, 20, 20, 20, 20])

In [12]:
# Reshape One-D to Two-D Array
arr1.reshape(2, 3)

array([[10, 20, 20],
       [20, 20, 20]])

In [13]:
my_2d_array = arr1.reshape(3, 2)

In [14]:
arr1

array([10, 20, 20, 20, 20, 20])

In [15]:
arr1.reshape(1, 6)

array([[10, 20, 20, 20, 20, 20]])

In [16]:
arr1.shape

(6,)

In [17]:
# When you add a scalar to an array, it adds it to each element in the array.
# This is called broadcasting
arr1 + 5

array([15, 25, 25, 25, 25, 25])

In [18]:
# Same thing with other operators on Scalar.  All operations broadcast to each element
arr1 * 5

array([ 50, 100, 100, 100, 100, 100])

In [19]:
# It works on 2-D Arrays too
arr2 * 10

array([[10, 20, 30],
       [40, 50, 60]])

In [20]:
# Now, lets add two Numpy Arrays
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])

arr1 + arr2

array([5, 7, 9])

In [21]:
# Multiply Two Numpy Arrays
arr1 * arr2

array([ 4, 10, 18])

### Main Idea: ALL Numpy Operations are element-wise operations.

* Adding to Numpy Arrays, results in element-wise addition
* Multiplying two Arrays, results in element-wise multiplication

What if I want to do regular matrix multiplication?

In [22]:
matlist1 = [[1,4],[6,2],[-1,3]]
matlist2 = [[3,2],[1,5]]

mat1 = np.array(matlist1)
mat2 = np.array(matlist2)

In [23]:
mat1

array([[ 1,  4],
       [ 6,  2],
       [-1,  3]])

In [24]:
mat2

array([[3, 2],
       [1, 5]])

In [25]:
# dot is dot product for arrays, and then is "generalized" for matrices
(np.dot(mat1,mat2))

array([[ 7, 22],
       [20, 22],
       [ 0, 13]])

In [26]:
# This can also be done as a method on left matrix to the right

mat1.dot(mat2)

array([[ 7, 22],
       [20, 22],
       [ 0, 13]])

In [27]:
matlist3 = [[5,4],[6,2],[-1,3]]
matlist4 = [[3,1],[2,5]]

mat3 = np.array(matlist3)
mat4 = np.array(matlist4)

In [28]:
mat5 =mat3.dot(mat4)
mat5

array([[23, 25],
       [22, 16],
       [ 3, 14]])

In [29]:
mat3.dot(mat4).dot(mat2).dot(mat2)

array([[ 453, 1043],
       [ 370,  784],
       [ 145,  426]])

In [30]:
(np.dot(mat4, mat2)).dot(mat2)

array([[ 41,  75],
       [ 62, 167]])

### Numpy Functions

In [31]:
# Create a Numpy array from 1-100
np.arange(1, 101).reshape(10,10)

array([[  1,   2,   3,   4,   5,   6,   7,   8,   9,  10],
       [ 11,  12,  13,  14,  15,  16,  17,  18,  19,  20],
       [ 21,  22,  23,  24,  25,  26,  27,  28,  29,  30],
       [ 31,  32,  33,  34,  35,  36,  37,  38,  39,  40],
       [ 41,  42,  43,  44,  45,  46,  47,  48,  49,  50],
       [ 51,  52,  53,  54,  55,  56,  57,  58,  59,  60],
       [ 61,  62,  63,  64,  65,  66,  67,  68,  69,  70],
       [ 71,  72,  73,  74,  75,  76,  77,  78,  79,  80],
       [ 81,  82,  83,  84,  85,  86,  87,  88,  89,  90],
       [ 91,  92,  93,  94,  95,  96,  97,  98,  99, 100]])

In [32]:
# Enough of similarities.  Let's show some awesomeness
# Create 10 elements between 0 and 1 - linearly separated
np.linspace(1, 0, 10)

array([1.        , 0.88888889, 0.77777778, 0.66666667, 0.55555556,
       0.44444444, 0.33333333, 0.22222222, 0.11111111, 0.        ])

In [33]:
np.linspace(0,1,11)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [34]:
np.exp(np.linspace(2,3,6))

array([ 7.3890561 ,  9.0250135 , 11.02317638, 13.46373804, 16.44464677,
       20.08553692])

In [35]:
np.logspace(2,3,6, base = np.exp(1))

array([ 7.3890561 ,  9.0250135 , 11.02317638, 13.46373804, 16.44464677,
       20.08553692])

In [36]:
# Now, let's look at some distributions

In [37]:
# Normal Dist
# Mean of 1, std of 0.1, 10 elements
np.random.normal(1, 0.1, 10)

array([0.99470044, 0.74343935, 1.09965318, 1.01093912, 0.99880515,
       0.93824255, 1.05421937, 0.99653057, 1.08662854, 1.0204081 ])

In [38]:
# Standard Normal Dist - params are dimensions of array
np.random.randn(2, 4)

array([[ 0.19605019, -1.09139879, -0.96535172, -0.04399857],
       [ 2.09896377, -0.28400621, -0.80688438,  0.06831521]])

In [39]:
## Uniform [0,1] distribution
np.random.random((3,3))

array([[0.40106914, 0.40229465, 0.66359126],
       [0.69268769, 0.08473302, 0.74458922],
       [0.91374655, 0.8034578 , 0.29414241]])

In [40]:
# Look at Other Distribitions - Gamma, binomial etc..

# TO DO, use help, dir, google, or other tricks and figure out how to 
# create an array of 20 binomial variables with n = 7 and p = .3

#dir(np.random)
#help(np.random.binomial)
np.random.binomial(7,.3,20)
# np.random.

array([3, 3, 2, 2, 1, 1, 1, 1, 2, 0, 1, 3, 2, 2, 1, 1, 2, 2, 3, 3])

Here are some simple ways to create simple numpy arrays

1.   List item
2.   List item



In [41]:
dir(np.random)

['Lock',
 'RandomState',
 '__RandomState_ctor',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_numpy_tester',
 'absolute_import',
 'bench',
 'beta',
 'binomial',
 'bytes',
 'chisquare',
 'choice',
 'dirichlet',
 'division',
 'exponential',
 'f',
 'gamma',
 'geometric',
 'get_state',
 'gumbel',
 'hypergeometric',
 'info',
 'laplace',
 'logistic',
 'lognormal',
 'logseries',
 'mtrand',
 'multinomial',
 'multivariate_normal',
 'negative_binomial',
 'noncentral_chisquare',
 'noncentral_f',
 'normal',
 'np',
 'operator',
 'pareto',
 'permutation',
 'poisson',
 'power',
 'print_function',
 'rand',
 'randint',
 'randn',
 'random',
 'random_integers',
 'random_sample',
 'ranf',
 'rayleigh',
 'sample',
 'seed',
 'set_state',
 'shuffle',
 'standard_cauchy',
 'standard_exponential',
 'standard_gamma',
 'standard_normal',
 'standard_t',
 'test',
 'triangular',
 'uniform',
 'vonmises',
 'wald',
 'weibull'

In [42]:
help(np.random.poisson)

Help on built-in function poisson:

poisson(...) method of mtrand.RandomState instance
    poisson(lam=1.0, size=None)
    
    Draw samples from a Poisson distribution.
    
    The Poisson distribution is the limit of the binomial distribution
    for large N.
    
    Parameters
    ----------
    lam : float or array_like of floats
        Expectation of interval, should be >= 0. A sequence of expectation
        intervals must be broadcastable over the requested size.
    size : int or tuple of ints, optional
        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
        ``m * n * k`` samples are drawn.  If size is ``None`` (default),
        a single value is returned if ``lam`` is a scalar. Otherwise,
        ``np.array(lam).size`` samples are drawn.
    
    Returns
    -------
    out : ndarray or scalar
        Drawn samples from the parameterized Poisson distribution.
    
    Notes
    -----
    The Poisson distribution
    
    .. math:: f(k; \lambda)=\fra

In [43]:
np.random.poisson(7,(2,5))

array([[11, 15,  7,  5,  7],
       [ 5,  7,  7,  4,  6]])

In [44]:
np.zeros((3,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [45]:
np.ones((5,2))

array([[1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.]])

In [46]:
np.full((7,7),3.5)

array([[3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5],
       [3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5],
       [3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5],
       [3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5],
       [3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5],
       [3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5],
       [3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5]])

In [47]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [48]:
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
a

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [49]:
row_r1 = a[1, :]    # Rank 1 view of the second row of a  
row_r2 = a[1:2, :]  # Rank 2 view of the second row of a

print(row_r1, row_r1.shape)
print(row_r2, row_r2.shape)

[5 6 7 8] (4,)
[[5 6 7 8]] (1, 4)


In [50]:
a>7

array([[False, False, False, False],
       [False, False, False,  True],
       [ True,  True,  True,  True]])

In [51]:
np.sum(a)

78

In [52]:
a

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [53]:
np.sum(a, axis = 0)

array([15, 18, 21, 24])

In [54]:
np.sum(a, axis=1)

array([10, 26, 42])

In [55]:
# Transpose of a matrix
a.T

array([[ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11],
       [ 4,  8, 12]])

In [56]:
x = a.T
x

array([[ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11],
       [ 4,  8, 12]])

In [57]:
v = np.array([1, -1, 5])
v

array([ 1, -1,  5])

In [58]:
y = x + v  
y

array([[ 2,  4, 14],
       [ 3,  5, 15],
       [ 4,  6, 16],
       [ 5,  7, 17]])

In [59]:
mat33 = np.ones((4,3))
mat33

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [60]:
y+mat33

array([[ 3.,  5., 15.],
       [ 4.,  6., 16.],
       [ 5.,  7., 17.],
       [ 6.,  8., 18.]])

In [61]:
np.tile(v,(3,2))

array([[ 1, -1,  5,  1, -1,  5],
       [ 1, -1,  5,  1, -1,  5],
       [ 1, -1,  5,  1, -1,  5]])

In [62]:
dir(v)

['T',
 '__abs__',
 '__add__',
 '__and__',
 '__array__',
 '__array_finalize__',
 '__array_interface__',
 '__array_prepare__',
 '__array_priority__',
 '__array_struct__',
 '__array_ufunc__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__complex__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__ilshift__',
 '__imatmul__',
 '__imod__',
 '__imul__',
 '__index__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__irshift__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lshift__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdivmod__',
 '__reduce__',
 '__reduce_e

In [63]:
v = np.array([1, -1, 5, 9, -4, 23, -17])
print(v)
np.cumsum(v)

[  1  -1   5   9  -4  23 -17]


array([ 1,  0,  5, 14, 10, 33, 16])

In [64]:
np.argmax(v)

5

In [65]:
np.all(v)

True

In [66]:
np.all(np.cumsum(v))

False

### One final note on numpy arrays

Note that (unlike lists in Python), numpy arrays must have all of the elements be the same type.

In [67]:
stringlist = ['this','list','contains','strings']
arrstring = np.array(stringlist)
arrstring

array(['this', 'list', 'contains', 'strings'], dtype='<U8')

In [68]:
print('Shape of Array: ', arrstring.shape)
print('Type of object: ', type(arrstring))
print('Type of contents: ', arrstring.dtype)
print('Size (elements): ', arrstring.size)
print('ndim (number of dim): ', arrstring.ndim)

Shape of Array:  (4,)
Type of object:  <class 'numpy.ndarray'>
Type of contents:  <U8
Size (elements):  4
ndim (number of dim):  1


Below, we can see what happens when we try to mix different kinds of elements

In [69]:
mixedlist = [123,'this','list','has',54321,'elements','pi','=',3.14]
arrmixed = np.array(mixedlist)
arrmixed

array(['123', 'this', 'list', 'has', '54321', 'elements', 'pi', '=',
       '3.14'], dtype='<U21')

In [70]:
crazylist = [123,'strings',['list','of','strings'],'more','text']
arrcrazy = np.array(crazylist)


ValueError: setting an array element with a sequence

In [86]:
arr1 < 3

array([ True,  True, False, False, False, False])