## NumPy 

is pure gold. 

It is fast, easy to learn, feature-rich, and therefore at the core of almost all popular scientific packages in the 
Python universe (including SciPy and Pandas, two most widely used packages for data science and statistical modeling). 

In [3]:
import numpy as np
import pandas as pd

In [4]:
for i in range(10, 15, 2):
    print(i)

10
12
14


In [3]:
# LISTs are good, you can change values but you cannot apply mathematical 
# functions

In [4]:
distance = [10, 15, 17, 26]
time     = [1,   2,  3,  4]

In [5]:
speed = distance / time

TypeError: unsupported operand type(s) for /: 'list' and 'list'

In [6]:
np_distance = np.array(distance)
np_time     = np.array(time)

In [7]:
speed = np_distance/np_time
speed

array([10.        ,  7.5       ,  5.66666667,  6.5       ])

In [8]:
# why use numpy
# compare numpy array with standard python LIST type array

In [9]:
list(range(5, 10))

[5, 6, 7, 8, 9]

In [10]:
for i in range(5):
    print(i)

0
1
2
3
4


In [11]:
import sys

In [12]:
np.arange(5)


array([0, 1, 2, 3, 4])

In [13]:
# PYTHON LIST
data = range(1000)

print(sys.getsizeof(5)*len(data))

# NUMPY ARRAY
data_np = np.arange(1000)
print(data_np.size * data_np.itemsize)

28000
4000


In [14]:
# The above output shows that the memory allocated by list is 28000 whereas the 
# memory allocated by the numpy array is just 4000. 
# From this, you can conclude that there is a major difference between the two and 
# this makes python numpy array as the preferred choice over list.

In [15]:
# Example - getsizeof
print('Integer memory size : ', sys.getsizeof(5) )
print('Integer memory size : ', sys.getsizeof(56666666) )
print('Integer memory size : ', sys.getsizeof(56666669559956) )
print('Float   memory size : ', sys.getsizeof(5.05) )
print('Float   memory size : ', sys.getsizeof(5.058585885) )

print('\nString  memory size : ', sys.getsizeof('1') )
print('String  memory size : ', sys.getsizeof('A') )
print('String  memory size : ', sys.getsizeof('AA') )
print('String  memory size : ', sys.getsizeof('AAA') )
print('String  memory size : ', sys.getsizeof('AAAA') )
print('String  memory size : ', sys.getsizeof('AAAAA') )

Integer memory size :  28
Integer memory size :  28
Integer memory size :  32
Float   memory size :  24
Float   memory size :  24

String  memory size :  54
String  memory size :  54
String  memory size :  51
String  memory size :  52
String  memory size :  53
String  memory size :  54


In [16]:
import time
import sys
 
SIZE = 1000000
 
L1= range(SIZE)
L2= range(SIZE)
A1= np.arange(SIZE)
A2=np.arange(SIZE)
 
start= time.time()
result=[(x,y) for x,y in zip(L1,L2)]
print((time.time()-start)*1000)
 
start=time.time()
result= A1+A2
print((time.time()-start)*1000)

229.8567295074463
60.96243858337402


In [17]:
SIZE = 10
L1= range(SIZE)
L2= range(SIZE)
[(x,y) for x,y in zip(L1,L2)]

[(0, 0),
 (1, 1),
 (2, 2),
 (3, 3),
 (4, 4),
 (5, 5),
 (6, 6),
 (7, 7),
 (8, 8),
 (9, 9)]

In [18]:
range(5)

range(0, 5)

In [19]:
list(range(5))

[0, 1, 2, 3, 4]

In [20]:
np.arange(5)

array([0, 1, 2, 3, 4])

In [21]:
# numpy array
np_arr     =  np.arange(1000000)

# normal list
normal_arr = list(range(1000000))

In [22]:
len(np_arr)

1000000

In [23]:
%%time 

# loop using numpy array
for i in range(5):
    np_arr2 = np_arr * 2

Wall time: 12 ms


In [24]:
%%time 
# loop using normal python array, list

for i in range(5):
    normal_arr2 = [x * 2 for x in normal_arr]

Wall time: 674 ms


In [25]:
[every_val * 2 for every_val in normal_arr]
    

[0,
 2,
 4,
 6,
 8,
 10,
 12,
 14,
 16,
 18,
 20,
 22,
 24,
 26,
 28,
 30,
 32,
 34,
 36,
 38,
 40,
 42,
 44,
 46,
 48,
 50,
 52,
 54,
 56,
 58,
 60,
 62,
 64,
 66,
 68,
 70,
 72,
 74,
 76,
 78,
 80,
 82,
 84,
 86,
 88,
 90,
 92,
 94,
 96,
 98,
 100,
 102,
 104,
 106,
 108,
 110,
 112,
 114,
 116,
 118,
 120,
 122,
 124,
 126,
 128,
 130,
 132,
 134,
 136,
 138,
 140,
 142,
 144,
 146,
 148,
 150,
 152,
 154,
 156,
 158,
 160,
 162,
 164,
 166,
 168,
 170,
 172,
 174,
 176,
 178,
 180,
 182,
 184,
 186,
 188,
 190,
 192,
 194,
 196,
 198,
 200,
 202,
 204,
 206,
 208,
 210,
 212,
 214,
 216,
 218,
 220,
 222,
 224,
 226,
 228,
 230,
 232,
 234,
 236,
 238,
 240,
 242,
 244,
 246,
 248,
 250,
 252,
 254,
 256,
 258,
 260,
 262,
 264,
 266,
 268,
 270,
 272,
 274,
 276,
 278,
 280,
 282,
 284,
 286,
 288,
 290,
 292,
 294,
 296,
 298,
 300,
 302,
 304,
 306,
 308,
 310,
 312,
 314,
 316,
 318,
 320,
 322,
 324,
 326,
 328,
 330,
 332,
 334,
 336,
 338,
 340,
 342,
 344,
 346,
 348,
 350,

In [26]:
np.set_printoptions(edgeitems=5, linewidth=100)

In [27]:
print(np_arr2)

[      0       2       4       6       8 ... 1999990 1999992 1999994 1999996 1999998]


In [28]:
# NumPy-based algorithms are generally 10 to 100 times faster (or more) 
# than their pure Python counterparts and use significantly less memory.

In [29]:
# NDIM

In [30]:
a = np.array(
             [ 
               [1, 2, 3], 
               (4, 5, 6)
             ]
            )
a

array([[1, 2, 3],
       [4, 5, 6]])

In [31]:
print(a.ndim)     # number of dimensions in the array

#print(a.itemsize) # So every element occupies 4 byte in the above numpy array.
print(a.size)

print(a.shape)

2
6
(2, 3)


In [32]:
# linespace
# Equally-spaced values

In [33]:
np.linspace(10, 20, 3000)

array([10.        , 10.00333444, 10.00666889, 10.01000333, 10.01333778, ..., 19.98666222,
       19.98999667, 19.99333111, 19.99666556, 20.        ])

In [34]:
np.logspace(5, 7, 2)

array([  100000., 10000000.])

In [35]:
np.logspace(1, 2, 2)

array([ 10., 100.])

In [36]:
np.logspace(1, 2, 3)

array([ 10.       ,  31.6227766, 100.       ])

In [37]:
np.linspace(5, 10, 5, endpoint=False, dtype=int)

array([5, 6, 7, 8, 9])

## Random
The random module provides functions to generate random numbers 
(and also statistical distributions) of any given shape.

numpy.random.randn generates samples from the normal distribution, 
numpy.random.rand  generates samples from the unifrom (in range [0,1)).

#### numpy.random.rand(d0, d1, ..., dn)

Random values in a given shape.

In [38]:
np.random.rand(3,2)

array([[0.61972015, 0.74185294],
       [0.9625047 , 0.19868709],
       [0.70537955, 0.56737358]])

In [39]:
np.random.rand(3,2,4)

array([[[0.12776961, 0.12456099, 0.52578446, 0.5443928 ],
        [0.61658392, 0.98989537, 0.28225725, 0.35124483]],

       [[0.73124847, 0.63252721, 0.64413125, 0.80513861],
        [0.46771667, 0.93379754, 0.71950062, 0.66013093]],

       [[0.23905227, 0.36075675, 0.26944105, 0.56678577],
        [0.96890495, 0.7840926 , 0.50956973, 0.35756621]]])

In [40]:
np.random.rand(5)

array([0.05233828, 0.79160451, 0.00342553, 0.80362986, 0.18308496])

In [41]:
np.random.rand(5, )

array([0.54759087, 0.4443677 , 0.04838629, 0.5874012 , 0.57148893])

#### numpy.random.randn(d0, d1, ..., dn)
Return a sample (or samples) from the “standard normal” distribution.

random floats sampled from a univariate “normal” (Gaussian) distribution of mean 0 and variance 1 (if any of the d_i are floats, they are first converted to integers by truncation). 

A single float randomly sampled from the distribution is returned if no argument is provided.

In [42]:
np.random.randn()

0.5931876375301097

In [43]:
np.random.randn(2, 4)

array([[ 0.94673006, -1.22480507, -0.75592051, -1.43788331],
       [-2.15574122, -1.44890729, -0.33164569, -0.9969835 ]])

#### numpy.random.randint(low, high=None, size=None, dtype='l')
Return random integers from low (inclusive) to high (exclusive).

In [44]:
# Random integers between [0, 10) of shape 2,2
print(np.random.randint(5, 10, size=[5, 4]))

[[5 5 8 5]
 [9 5 6 8]
 [5 9 6 9]
 [9 8 7 7]
 [5 6 7 9]]


#### numpy.random.random(size=None)
Return random floats in the half-open interval [0.0, 1.0).

In [45]:
# Random numbers between [0,1) of shape 2,2
print(np.random.random(size=[2, 20]))

[[0.85873279 0.56293149 0.02176054 0.48584158 0.51466679 0.93169966 0.51418347 0.26351418
  0.75150962 0.56932522 0.21897608 0.99739675 0.26416393 0.13178147 0.15321169 0.46113198
  0.80029243 0.35949812 0.67998795 0.21909993]
 [0.13491498 0.86918445 0.3648022  0.49675922 0.01158669 0.38378131 0.48973208 0.52692741
  0.47940161 0.89835856 0.19690661 0.85649863 0.88993677 0.14133638 0.1364203  0.7443663
  0.85893176 0.32367032 0.08215487 0.79134866]]


In [46]:
np.random.random()

0.668919692659592

#### numpy.random.choice(a, size=None, replace=True, p=None)
Generates a random sample from a given 1-D array

In [47]:
# Pick 10 items from a given list, with equal probability
print(np.random.choice(['deepak', 'raj', 'rakesh', 'pratap', 'kumar'], size=3, replace=True))  

['kumar' 'deepak' 'deepak']


In [48]:
print(np.random.choice([10, 20, 40, 600, 400], size=3, replace=False))  

[600  40  20]


In [49]:
# If you want to repeat the same set of random numbers every time, you need to set the seed 
# or the random state. The see can be any value. The only requirement is you must set the seed 
# to the same value every time you want to generate the same set of random numbers.

# Once np.random.RandomState is created, all the functions of the np.random module becomes 
# available to the created randomstate object.

In [54]:
# Create the random state/ SEEDING
rn = np.random.RandomState(1)

# Create random numbers between [0,1) of shape 2,2
print(rn.rand(1, 2))

[[0.417022   0.72032449]]


In [51]:
# [[0.91910903 0.6421956 ]]

In [42]:
rn = np.random.RandomState(40)
print(rn.rand(2,2))

[[0.40768703 0.05536604]
 [0.78853488 0.28730518]]


In [44]:
rn = np.random.RandomState(41)
print(rn.rand(2,2))

[[0.25092362 0.04609582]
 [0.67681624 0.04346949]]


In [29]:
# Set the random seed
np.random.seed(101)

# Create random numbers between [0,1) of shape 2,2
print(np.random.rand(2,2))

[[0.51639863 0.57066759]
 [0.02847423 0.17152166]]


In [46]:
# How to get the unique items and the counts?
# The np.unique method can be used to get the unique items. 
# If you want the repetition counts of each item, set the return_counts parameter to True.

# Create random integers of size 10 between [0,10)
np.random.seed(1090)
arr_rand = np.random.randint(0, 10, size=4)
print(arr_rand)

[6 3 2 2]


In [47]:
# Get the unique items and their counts
uniqs, counts = np.unique(arr_rand, return_counts=True)
print("Unique items : ", uniqs)
print("Counts       : ", counts)

Unique items :  [2 3 6]
Counts       :  [2 1 1]


In [48]:
np.unique(arr_rand, return_counts=True)

(array([2, 3, 6]), array([2, 1, 1], dtype=int64))

In [38]:
# Normal distribution with mean=0 and variance=1
data = np.random.randn(3, 3)

In [29]:
data * 10

array([[ -5.07466627,  13.83221556,  -0.35655646],
       [ 10.63415345,  12.81528231, -14.59144362],
       [ -4.31064067,  -1.87615251,   3.77466369]])

In [39]:
data + data

array([[-3.0246213 ,  1.07555975, -3.50838351],
       [-2.63850978,  1.30903435,  1.43948332],
       [-0.55778044,  0.9565832 , -1.60488104]])

In [40]:
data.shape

(3, 3)

In [41]:
data.dtype

dtype('float64')

In [49]:
# creating ndarray
data1 = [6, 7.1, 85, 0, 1]

In [50]:
arr1 = np.array(data1)

In [51]:
arr1

array([ 6. ,  7.1, 85. ,  0. ,  1. ])

In [47]:
# nested array
data2 = [[1, 2.3, 3, 4], [5, 6, 7, 8]]

In [48]:
arr2 = np.array(data2)

In [40]:
arr2

array([[1. , 2.3, 3. , 4. ],
       [5. , 6. , 7. , 8. ]])

In [41]:
arr2.shape

(2, 4)

In [42]:
arr2.ndim

2

In [43]:
arr2.dtype

dtype('float64')

In [33]:
# other methods of creating arrays

In [52]:
# zeros
a = np.zeros(10)
a

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [53]:
b = np.zeros((3, 6))
b

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [54]:
c = np.ones_like(b)
c

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [52]:
# ones
a = np.ones(10, dtype=int)
a

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [56]:
b = np.ones((3, 6))
b

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [50]:
c = np.ones_like(b)
c

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [58]:
# arange
a = np.arange(15)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [59]:
a.dtype

dtype('int32')

In [60]:
# convert the data type
b = a.astype(np.float64)
b

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14.])

In [74]:
# Arithmetic with NumPy Arrays
# Any arithmetic operations between equal-size arrays applies the 
# operation element-wise:

In [61]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])

In [57]:
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [62]:
arr + 10

array([[11., 12., 13.],
       [14., 15., 16.]])

In [63]:
arr + arr

array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]])

In [60]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [65]:
# Comparisons between arrays of the same size yield boolean arrays:
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])

In [66]:
arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [67]:
# boolean indexes
arr2 > 5

array([[False, False, False],
       [ True, False,  True]])

In [68]:
arr2 > arr

array([[False,  True, False],
       [ True, False,  True]])

# Basic Indexing and Slicing

In [53]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [54]:
arr[5]

5

In [55]:
arr[5:8]

array([5, 6, 7])

In [56]:
# broadcasting a value to multiple array positions
arr[5:8] = 12

In [57]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [60]:
arr_slice = arr[5: 8]
arr_slice

array([12, 12, 12])

In [61]:
# change values in arr_slice, the mutations are reflected in the original 
# array arr
arr_slice[0] = 12345

In [63]:
arr_slice

array([12345,    12,    12])

In [64]:
arr

array([    0,     1,     2,     3,     4, 12345,    12,    12,     8,     9])

In [36]:
# using : only means all values
arr_slice[1:2] = 9999

In [37]:
arr_slice

array([12345,  9999,    12])

In [60]:
arr

array([    0,     1,     2,     3,     4, 12345,  9999,    12,     8,
           9])

In [None]:
# if you just assign a portion of an array to another array, the new array you just 
# created actually refers to the parent array in memory.
# That means, if you make any changes to the new array, it will reflect in the parent 
# array as well.

# So to avoid disturbing the parent array, you need to make a copy of it using copy(). 
# All numpy arrays come with the copy() method.

# deep copy method

In [65]:
# If you want a copy of a slice of an ndarray instead of a view, you
# will need to explicitly copy the array — for example,
# arr[5:8].copy().
arr_slice = arr[5:8].copy()

In [66]:
arr_slice

array([12345,    12,    12])

In [83]:
arr_slice[:] = 888

In [84]:
arr_slice

array([888, 888, 888])

In [85]:
# the original array is not changed
arr

array([    0,     1,     2,     3,     4, 12345,    12,    12,     8,
           9])

In [43]:
# Create a 2d array with 3 rows and 4 columns
list2 = [[1, 2, 3, 4],[3, 4, 5, 6], [5, 6, 7, 8]]

arr2 = np.array(list2, dtype='float')
arr2

array([[1., 2., 3., 4.],
       [3., 4., 5., 6.],
       [5., 6., 7., 8.]])

In [75]:
arr2a = arr2[:3,:3]  
arr2a[:3, :3] = 100  # 100 will reflect in arr2
arr2a

array([[100., 100., 100.],
       [100., 100., 100.],
       [100., 100., 100.]])

In [106]:
# Copy portion of arr2 to arr2b
arr2b = arr2[:2, :2].copy()
arr2b[:1, :1] = 101  # 101 will not reflect in arr2
arr2

array([[ 100.,    2.,    3.,    4.],
       [   3.,    4.,    5.,    6.],
       [   5.,    6.,    7.,    8.]])

In [239]:
# Indexing with slices
arr = np.arange(10)

In [240]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [241]:
arr[1:6]

array([1, 2, 3, 4, 5])

In [244]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [245]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [246]:
arr2d[2]

array([7, 8, 9])

In [6]:
arr2d[0][2]

3

In [115]:
# expression arr2d[:2] as “select the first two rows of arr2d.”
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [116]:
# first 2 rows and all columns from 1st pos
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

In [117]:
# 1st row, all columns 0th and 1st
arr2d[1, :2]

array([4, 5])

In [118]:
arr2d[:2, 2]

array([3, 6])

In [120]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [119]:
arr2d[:, 1]

array([2, 5, 8])

In [121]:
arr2d[:2, 1:] = 0

In [122]:
arr2d

array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

# Boolean Indexing

In [60]:
names = np.array(['Rajat', 'Maruthi', 'Dinesh', 'Rajat', 'Bhanu', 'Viswa', 'Kamal'])

data  = np.random.randn(7, 4)

In [61]:
type(names)

numpy.ndarray

In [62]:
data

array([[-0.47516227, -1.63618564, -1.03423515,  1.3305419 ],
       [-0.35014555,  1.48826891,  3.11954352, -1.16948809],
       [-1.65268844,  1.66176101,  0.49309883, -0.67987526],
       [-0.49219277,  0.18097391, -0.28030137, -0.99839374],
       [ 1.19720744,  0.6774302 ,  0.44954604,  0.12556793],
       [ 3.26682321,  0.36006058,  0.64091742, -1.56398868],
       [-1.14786465, -2.13350459, -0.7108062 ,  0.68405774]])

In [63]:
# Suppose each name corresponds to a row in the data array and we wanted to 
# select all the rows with corresponding name 'Rajat'.
names == 'Rajat'

array([ True, False, False,  True, False, False, False])

In [64]:
data[names == 'Rajat']

array([[-0.47516227, -1.63618564, -1.03423515,  1.3305419 ],
       [-0.49219277,  0.18097391, -0.28030137, -0.99839374]])

In [40]:
# The boolean array must be of the same length as the array axis it’s 
# indexing.

In [65]:
# what happens if they are of different lengths
names = np.array(['Rajat', 'Maruthi', 'Rajat', 'Bhanu', 'Viswa' ])
data = np.random.randn(7, 4)

In [66]:
data[names == 'Rajat']

IndexError: boolean index did not match indexed array along dimension 0; dimension is 7 but corresponding boolean dimension is 5

In [71]:
names = np.array(['Rajat', 'Maruthi', 'Dinesh', 'Rajat', 'Bhanu', 'Viswa', 'Kamal'])
data = np.random.randn(7, 4)
data

array([[ 0.47705633, -0.30265041,  0.70221482,  0.60406511],
       [-1.01484429, -2.52341764, -0.74682102,  0.95519768],
       [-2.02476846, -1.23506726,  0.30324701, -1.16072896],
       [-0.53602955, -0.92317164,  1.4682271 , -0.74557656],
       [-0.48608859,  0.38062046, -0.40161537,  1.66452105],
       [-1.59894846,  0.67239015,  0.9409431 , -0.40073823],
       [ 1.15841076,  0.49946209, -2.12805837,  0.80098367]])

In [72]:
data[names == 'Rajat', :2]

array([[ 0.47705633, -0.30265041],
       [-0.53602955, -0.92317164]])

In [73]:
data[names == 'Rajat', 2]

array([0.70221482, 1.4682271 ])

In [74]:
# To select everything but 'Rajat', you can either use != or negate 
# the condition using ~

In [75]:
names != 'Rajat'

array([False,  True,  True, False,  True,  True,  True])

In [76]:
data[names != 'Rajat']

array([[-1.01484429, -2.52341764, -0.74682102,  0.95519768],
       [-2.02476846, -1.23506726,  0.30324701, -1.16072896],
       [-0.48608859,  0.38062046, -0.40161537,  1.66452105],
       [-1.59894846,  0.67239015,  0.9409431 , -0.40073823],
       [ 1.15841076,  0.49946209, -2.12805837,  0.80098367]])

In [84]:
data[~(names == 'Rajat')]

array([[-0.7257825 , -0.23304294,  0.79512783, -0.82779381],
       [-0.88198941, -0.59675786, -0.41573425,  0.33240765],
       [-0.29271833,  1.10976086, -0.13034251,  0.30730461],
       [ 0.02499974,  0.23292252,  0.20152328,  0.53563886],
       [-0.65478464,  0.54041339, -1.92335817, -0.02839689]])

In [55]:
cond = names == 'Rajat'

In [56]:
data[cond]

array([[ 0.71366234, -1.57264267, -1.51452019, -0.49902602],
       [-0.03489345,  0.1165809 ,  1.00369667, -1.07490809]])

In [57]:
data[~cond]

array([[ 0.19355872,  0.2924369 ,  0.55851144, -0.35602768],
       [-0.82169447, -0.50230961,  1.95569125,  0.11675839],
       [-0.96035662,  0.55041302,  0.54828687,  0.52172206],
       [ 0.36385055,  1.39868101, -0.65673978, -0.91079013],
       [ 0.02872506, -0.67861069, -0.35461271,  0.04509057]])

In [140]:
# Selecting two of the three names to combine multiple boolean conditions, 
# use boolean arithmetic operators like & (and) and | (or):

In [57]:
# The Python keywords and and or do not work with boolean arrays.
# Use & (and) and | (or) instead.
cond = (names == 'Rajat') | (names == 'Bhanu')

In [58]:
data[cond]

array([[ 1.33308089, -0.50616188,  1.51230802, -0.86483924],
       [-0.26482534,  1.11836962, -0.06863932, -1.74777717],
       [-1.162932  , -0.20284073, -0.9386163 , -0.52949487]])

In [59]:
cond = data > 0

In [60]:
cond

array([[ True, False,  True, False],
       [False,  True,  True, False],
       [ True, False, False,  True],
       [False,  True, False, False],
       [False, False, False, False],
       [False, False, False,  True],
       [ True,  True, False, False]])

In [99]:
data[cond]

array([0.40309523, 0.26205673, 1.80258176, 0.72525459, 1.28856092,
       0.86735102, 2.19629278, 1.67470646, 0.62474701, 2.29348766,
       0.96389112, 0.22721597, 1.74057542, 0.56788169, 0.04247589,
       0.89354072])

In [100]:
data[cond] = 0

In [146]:
data

array([[ 0.        , -2.6197451 ,  0.        ,  0.        ],
       [-0.29900735,  0.        , -1.98756891, -0.21967189],
       [ 0.        ,  0.        , -0.51827022, -0.8084936 ],
       [-0.50175704,  0.        ,  0.        , -0.5297602 ],
       [ 0.        ,  0.        ,  0.        , -0.70205309],
       [-0.32766215, -0.39210815, -1.46351495,  0.        ],
       [ 0.        ,  0.        , -0.23458713, -1.41537074]])

In [76]:
# change data for Rajat = 7
data[names != 'Rajat'] = 7

In [77]:
data

array([[-0.72285635,  0.07651531,  0.14400945,  0.54824053],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 0.40287592,  0.33038539, -1.36634681,  0.23088903],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ]])

In [149]:
# Transposing Arrays and Swapping Axes
# Transposing is a special form of reshaping that similarly returns a view on the underlying
# data without copying anything

In [104]:
a = np.array([(8,9,10),(11,12,13)])
print(a)

a=a.reshape(6,1)
print(a)

[[ 8  9 10]
 [11 12 13]]
[[ 8]
 [ 9]
 [10]
 [11]
 [12]
 [13]]


In [150]:
arr = np.arange(15)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [105]:
arr = np.arange(15).reshape((3, 5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [106]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [153]:
# Universal Functions: Fast Element-Wise Array Functions


In [107]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [108]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [156]:
np.exp(arr)

array([  1.00000000e+00,   2.71828183e+00,   7.38905610e+00,
         2.00855369e+01,   5.45981500e+01,   1.48413159e+02,
         4.03428793e+02,   1.09663316e+03,   2.98095799e+03,
         8.10308393e+03])

In [157]:
np.floor(arr)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.])

In [158]:
# useful in data anlytics
# is not a number
np.isnan(arr)

array([False, False, False, False, False, False, False, False, False, False], dtype=bool)

In [159]:
# binary functions
x = np.random.randn(8)
y = np.random.randn(8)

In [160]:
np.maximum(x, y)

array([-0.07444592, -0.34271452, -0.02651388,  0.06023021,  2.46324211,
        1.8861859 ,  0.30154734,  0.25755039])

In [161]:
np.minimum(x, y)

array([-0.42064532, -1.91877122, -0.80227727, -0.16128571,  0.40405086,
       -0.19236096,  0.17457781, -0.03471177])

In [None]:
# Array-Oriented Programming with Arrays
# NumPy arrays enables you to express many kinds of data processing tasks 
# as concise array expressions that might otherwise require writing loops

In [None]:
# This practice of replacing explicit loops with array expressions 
# is commonly referred to as vectorization.

In [None]:
# vectorized array operations will often be one or two (or more) orders
# of magnitude faster than their pure Python equivalents

In [26]:
# to evaluate the function sqrt(x^2 + y^2) 
# across a regular grid of values.

In [162]:
points = np.arange(-5, 5, 0.01) # 1000 equally spaced points

In [163]:
x = np.arange(10)
y = np.arange(10)

In [164]:
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [165]:
y

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [166]:
np.sqrt(np.add(x ** 2 , y ** 2))

array([  0.        ,   1.41421356,   2.82842712,   4.24264069,
         5.65685425,   7.07106781,   8.48528137,   9.89949494,
        11.3137085 ,  12.72792206])

In [None]:
# random number **********************************************

In [89]:
# mean at 0 and standard deviation of 1
# This will return random floats in the half-open interval [0.0, 1.0) 
# following the “continuous uniform” distribution.
np.random.randn(5)

array([ 0.46130127, -0.08454584, -2.13429696,  0.4659703 , -0.66906212])

In [90]:
np.random.random((2,2))

array([[0.01172748, 0.87810564],
       [0.92686513, 0.44576359]])

In [91]:
# Generate A Random Number From The Normal Distribution
np.random.normal()

-0.1449835093614585

In [92]:
# Generate Four Random Numbers From The Normal Distribution
np.random.normal(size=4)

array([-0.58808676,  0.65506892,  1.41204149, -0.22734397])

In [93]:
# Random number from distribution with mean 15, variance 2
np.random.normal(15, 2)

17.913825709813352

In [257]:
# an array of 10 points randomly sampled from a normal distribution
# loc=mean, scale=std deviation
np.random.normal(loc=0.0, scale=1.0, size=10)

array([ 0.09707755,  0.96864499, -0.70205309, -0.32766215, -0.39210815,
       -1.46351495,  0.29612028,  0.26105527,  0.00511346, -0.23458713])

In [94]:
# To return a 8 by 5 array of random numbers from the same distribution: 
np.random.normal(0, 3, size=(3, 4))

array([[-6.60766688, -0.9249152 , -1.60743084,  1.66362669],
       [ 0.45449135, -0.95370715,  0.55474357, -0.19222015],
       [ 0.478884  ,  1.5192083 , -6.02105364,  0.73427912]])

In [176]:
# A 5 by 3 array of random numbers from the standard normal distribution 
# with mean 1 and variance 1:

np.random.normal(size=(2, 4))

array([[ 1.15859558, -0.82068232,  0.96337613,  0.41278093],
       [ 0.82206016,  1.89679298, -0.24538812, -0.75373616]])

In [177]:
# Generate Four Random Integers Between 1 and 100
np.random.randint(low=1, high=100, size=4)

array([22, 30, 38, 51])

In [178]:
# Randomly constructing 1D array
array = np.random.rand(5)
print("1D Array filled with random values : \n", array)

1D Array filled with random values : 
 [ 0.89204656  0.63113863  0.7948113   0.50263709  0.57690388]


In [182]:
# Randomly constructing 1D array following Gaussian Distribution

# 1D Array
array = np.random.normal(0.0, 1.0, 5)
print("1D Array filled with random values "
      "as per gaussian distribution : \n", array)

# 3D array
array = np.random.normal(0.0, 1.0, (2, 3))
print("\n\n2D Array filled with random values "
      "as per gaussian distribution : \n", array)

1D Array filled with random values as per gaussian distribution : 
 [-0.60390437  0.30444912  0.25720749  0.02393181  0.87191399]


3D Array filled with random values as per gaussian distribution : 
 [[ 1.43735633  0.00730637  1.33088133]
 [ 0.98820261  0.23229616  0.17618092]]


In [None]:
# Making random numbers predictable *******************

In [95]:
# Set the state of the random number generator
np.random.seed(42)

In [97]:
# One set of random numbers
first_random_arr = np.random.normal(size=(2, 2))
print(first_random_arr)

# Another set
second_random_arr = np.random.normal(size=(2, 2))
print(second_random_arr)

# Reset the state of the random number generator
np.random.seed(45)

# The same as "first_random_arr" above.
print(np.random.normal(size=(2, 2)))

# The same as "second_random_arr" above.
print(np.random.normal(size=(2, 2)))

[[-0.46947439  0.54256004]
 [-0.46341769 -0.46572975]]
[[ 0.24196227 -1.91328024]
 [-1.72491783 -0.56228753]]
[[ 0.02637477  0.2603217 ]
 [-0.39514554 -0.20430091]]
[[-1.27163265 -2.59687863]
 [ 0.28968091 -0.87330464]]


In [89]:
# Create an object array to hold numbers as well as strings
arr1d_obj = np.array([1, 'a'], dtype='object')
arr1d_obj

array([1, 'a'], dtype=object)

In [90]:
# Convert an array back to a list
arr1d_obj.tolist()

[1, 'a']

In [91]:
# Create a 2d array with 3 rows and 4 columns
list2 = [[1, 2, 3, 4],[3, 4, 5, 6], [5, 6, 7, 8]]

arr2 = np.array(list2, dtype='float')
arr2

array([[ 1.,  2.,  3.,  4.],
       [ 3.,  4.,  5.,  6.],
       [ 5.,  6.,  7.,  8.]])

In [92]:
# Reverse only the row positions
arr2[::-1, ]

array([[ 5.,  6.,  7.,  8.],
       [ 3.,  4.,  5.,  6.],
       [ 1.,  2.,  3.,  4.]])

In [93]:
# Reverse the row and column positions
arr2[::-1, ::-1]

array([[ 8.,  7.,  6.,  5.],
       [ 6.,  5.,  4.,  3.],
       [ 4.,  3.,  2.,  1.]])

In [98]:
# represent missing values and infinite?
# Missing values can be represented using np.nan object, while np.inf represents infinite. 
# Let’s place some in arr2d.

# Insert a nan and an inf
arr2[1,1] = np.nan  # not a number
arr2[1,2] = np.inf  # infinite
arr2

array([[  1.,   2.,   3.,   4.],
       [  3.,  nan,  inf,   6.],
       [  5.,   6.,   7.,   8.]])

In [99]:
# Replace nan and inf with -1. 
missing_bool = np.isnan(arr2) | np.isinf(arr2)
print(missing_bool)

arr2[missing_bool] = -1  
arr2

[[False False False False]
 [False  True  True False]
 [False False False False]]


array([[ 1.,  2.,  3.,  4.],
       [ 3., -1., -1.,  6.],
       [ 5.,  6.,  7.,  8.]])

In [100]:
# compute mean, min, max on the ndarray
# mean, max and min
print("Mean value is: ", arr2.mean())
print("Max value is: ", arr2.max())
print("Min value is: ", arr2.min())

Mean value is:  3.58333333333
Max value is:  8.0
Min value is:  -1.0


In [102]:
# compute the minimum values row wise or column wise, use the np.amin 

# Row wise and column wise min
print("Column wise minimum : ", np.amin(arr2, axis=0))
print("Row wise minimum    : ", np.amin(arr2, axis=1))

Column wise minimum :  [ 1. -1. -1.  4.]
Row wise minimum    :  [ 1. -1.  5.]


In [None]:
# Reshaping and Flattening Multidimensional arrays
# Reshaping is changing the arrangement of items so that shape of the array changes 
# while maintaining the same number of dimensions.

# Flattening, however, will convert a multi-dimensional array to a flat 1d array. 
# And not any other shape.

# First, let’s reshape the arr2 array from 3×4 to 4×3 shape.

In [107]:
# Reshape a 3x4 array to 4x3 array
arr2.reshape(4, 3)

array([[ 100.,    2.,    3.],
       [   4.,    3.,    4.],
       [   5.,    6.,    5.],
       [   6.,    7.,    8.]])

In [None]:
# What is the difference between flatten() and ravel()?
# There are 2 popular ways to implement flattening. That is using the flatten() method 
# and the other using the ravel() method.

# The difference between ravel and flatten is, the new array created using ravel is 
# actually a reference to the parent array. 
# So, any changes to the new array will affect the parent as well. 
# But is memory efficient since it does not create a copy.

In [108]:
# Flatten it to a 1d array
arr2.flatten()

array([ 100.,    2.,    3.,    4.,    3.,    4.,    5.,    6.,    5.,
          6.,    7.,    8.])

In [111]:
# Changing the flattened array does not change parent
b1 = arr2.flatten()  
b1[0] = 105  # changing b1 does not affect arr2
arr2

array([[ 100.,    2.,    3.,    4.],
       [   3.,    4.,    5.,    6.],
       [   5.,    6.,    7.,    8.]])

In [112]:
b1

array([ 105.,    2.,    3.,    4.,    3.,    4.,    5.,    6.,    5.,
          6.,    7.,    8.])

In [113]:
# Changing the raveled array changes the parent also.
b2 = arr2.ravel()  
b2[0] = 101  # changing b2 changes arr2 also
arr2


array([[ 101.,    2.,    3.,    4.],
       [   3.,    4.,    5.,    6.],
       [   5.,    6.,    7.,    8.]])

In [114]:
# create sequences, repetitions 
# Lower limit is 0 be default
print(np.arange(5))  

[0 1 2 3 4]


In [115]:
# 0 to 9
print(np.arange(0, 10))  

[0 1 2 3 4 5 6 7 8 9]


In [116]:
# 0 to 9 with step of 2
print(np.arange(0, 10, 2))  

[0 2 4 6 8]


In [117]:
# 10 to 1, decreasing order
print(np.arange(10, 0, -1))

[10  9  8  7  6  5  4  3  2  1]


In [119]:
# create repeating sequences?
# np.tile will repeat a whole list or array n times. 
# Whereas, np.repeat repeats each item n times.

a = [1,2,3] 

# Repeat whole of 'a' two times
print('Tile:   ', np.tile(a, 2))

# Repeat each element of 'a' two times
print('Repeat: ', np.repeat(a, 2))

Tile:    [1 2 3 1 2 3]
Repeat:  [1 1 2 2 3 3]


## Matrix and Arrays

The matrix objects are a subclass of the numpy arrays (ndarray). 

- The matrix objects inherit all the attributes and methods of ndarry. 
- difference is that numpy matrices are strictly 2-dimensional, while numpy arrays can be of any dimension, i.e. they are n-dimensional.

The most important advantage of matrices is that the provide convenient notations for the matrix mulitplication. 

If X and Y are two Matrices than X * Y defines the matrix multiplication. While on the other hand, if X and Y are ndarrays, X * Y define an element by element multiplication.

In [29]:
x = np.array( ((2,3), (3, 5))  )
y = np.array( ((1,2), (5, -1)) )

In [30]:
x, y

(array([[2, 3],
        [3, 5]]),
 array([[ 1,  2],
        [ 5, -1]]))

In [31]:
xm = np.matrix( x )
ym = np.matrix( y )

In [32]:
xm, ym

(matrix([[2, 3],
         [3, 5]]),
 matrix([[ 1,  2],
         [ 5, -1]]))

In [33]:
xm * ym

matrix([[17,  1],
        [28,  1]])

it is is matrix dot product

In [34]:
x * y

array([[ 2,  6],
       [15, -5]])

element wise multiplication

In [35]:
np.dot(x,y)

array([[17,  1],
       [28,  1]])

#### np.multiply
element wise multiplication

In [38]:
arr1 = np.array([[1, 2],
                 [3, 4]])
arr2 = np.array([[5, 6],
                 [7, 8]])

arr_result = np.multiply(arr1, arr2)

print(arr_result)

[[ 5 12]
 [21 32]]


In [37]:
arr1 * arr2

array([[ 5, 12],
       [21, 32]])