<a href="https://colab.research.google.com/github/vvtrip/ml_manifestations/blob/master/7_tools/7_numpy_misc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Intro

In [1]:
import numpy as np

In [2]:
# N-dimensional array is an object, capable of holding data elements of same type and of a fixed size in multiple dimensions.
x = np.array([5, 8, 9, 10, 11])
type(x)

numpy.ndarray

In [3]:
# Creation of a 2-D array from a list of lists
y = np.array([[6, 9, 5], [10, 82, 34]])  
print(y)

[[ 6  9  5]
 [10 82 34]]


In [4]:
# Some of the important attributes of a ndarray are
# ndim : Returns number of dimensions.
# shape: Returns Shape in tuple.
# size : Total number of elements.
# dtype : Type of each element.
# itemsize : Size of each element in Bytes.
# nbytes : Total bytes consumed by all elements.

print(y.ndim, y.shape, y.size, y.dtype, y.itemsize, y.nbytes)

2 (2, 3) 6 int64 8 48


In [5]:
y = np.array([3+4j, 0.4+7.8j])
print(y.dtype)

complex128


In [6]:
y.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [7]:
x = np.array([[3.2, 7.8, 9.2],
             [4.5, 9.1, 1.2]], dtype='int64')
print(x.itemsize)


8


# Creation

In [8]:
# N-dimensional arrays or ndarray can be created in multiple ways in numpy.
# Now let us focus on creating ndarray,

# From Python built-in datatypes : lists or tuples
# Using Numpy array creation methods like ones, ones_like, zeros, zeros_like
# Using Numpy numeric sequence generators.
# Using Numpy random module.
# By reading data from a file.

# 1
# ndarrays from Lists
# Data available in lists, or tuples can be converted into numpy arrays using array method.

a = [[[4.1, 2.5], [1.1, 2.3], [9.1, 2.5]], 
     [[8.6, 9.9],[3.6, 4.3], [6.6, 0.3]]]

x = np.array(a, dtype='float64')

type(x), x.ndim, x.shape


(numpy.ndarray, 3, (2, 3, 2))

In [9]:
# Numpy allows creation of arrays with default values like 0, 1, or another value.
x = np.zeros(shape=(2,4))
print(x)

y = np.full(shape=(2,3), fill_value=10.5)
print(y)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[10.5 10.5 10.5]
 [10.5 10.5 10.5]]


In [10]:
# Numeric Sequence Generators
# Two major methods used in numpy for generating number sequences are,

# arange : Numbers created based on step value.
# Syntax - numpy.arange([start, ]stop, [step, ]dtype=None)

# linspace : Numbers created based on size value.
# Syntax - numpy.linspace(start, stop, #num inbetween, endpoint=True, retstep=False, dtype=None)

x = np.arange(3, 15, 2.5) # 2.5 is step
print(x)
y = np.linspace(3, 15, 5) # 5 is size of array 'y'
print(y)

[ 3.   5.5  8.  10.5 13. ]
[ 3.  6.  9. 12. 15.]


In [20]:
# Random Numbers Generator
# random module of numpy is used to generate various random sequences.

np.random.seed(100) # setting seed
x = np.random.rand(2) # 2 random numbers between 0 and 1
print(x)

x = np.random.rand(2,2,2) # 8 random numbers between 0 and 1 for a 3d array of size 2*2*2
print(x)

y = np.random.randint(10, 50, 3) # 3 random integers between 10 and 50
print(y)

[0.54340494 0.27836939]
[[[0.42451759 0.84477613]
  [0.00471886 0.12156912]]

 [[0.67074908 0.82585276]
  [0.13670659 0.57509333]]]
[46 26 19]


In [21]:
# Simulating Normal Distribution
# randn is used to simulate standard normal distribution.
np.random.seed(100)
x = np.random.randn(3) # Standard normal distribution
print(x)

x = 10 + 2*np.random.randn(3) # normal distribution with mean 10 and sd 2
print(x)

[-1.74976547  0.3426804   1.1530358 ]
[ 9.49512793 11.96264157 11.02843768]


In [22]:
# Reading Data from a file
# loadtxt is used to read data from a text file or any input data stream.

from io import StringIO

x = StringIO('''88.25 93.45 72.60 90.90
72.3 78.85 92.15 65.75
90.5 92.45 89.25 94.50
''')

d = np.loadtxt(x,delimiter=' ')

print(d)

print(d.ndim, d.shape)

[[88.25 93.45 72.6  90.9 ]
 [72.3  78.85 92.15 65.75]
 [90.5  92.45 89.25 94.5 ]]
2 (3, 4)


In [18]:
# trick question, it has both list and tuple
print(np.array(([1, 2], (3,4))).shape)

(2, 2)


In [17]:
#  identity matric of order 2
z = np.eye(2)
print(z)

[[1. 0.]
 [0. 1.]]


# Array shape manipulation

In [32]:
# Reshaping ndarrays
# Shape of an array can be changed using reshape.

x = np.random.randint(10, 100, 8)
print(x, end='\n\n')
y = x.reshape(2,4)
print(y, end='\n\n')
z = x.reshape(2,2,2)
print(z, '\n\n')

[63 76 24 44 34 25 70 68]

[[63 76 24 44]
 [34 25 70 68]]

[[[63 76]
  [24 44]]

 [[34 25]
  [70 68]]] 




In [26]:
# Stacking arrays vertically
# Two or more arrays can be joined vertically using the generic vstack method.
x = np.array([[-1, 1], [-3, 3]])
y = np.array([[-2, 2], [-4, 4]])
np.vstack((x,y))

array([[-1,  1],
       [-3,  3],
       [-2,  2],
       [-4,  4]])

In [27]:
# Two or more arrays can be joined horizontally using the generic hstack method.

x = np.array([[-1, 1], [-3, 3]])
y = np.array([[-2, 2], [-4, 4]])
z = np.array([[-5, 5], [-6, 6]])
np.hstack((x,y,z))


array([[-1,  1, -2,  2, -5,  5],
       [-3,  3, -4,  4, -6,  6]])

In [29]:
# Arrays can be split vertically using the generic vsplit method.
x = np.arange(30).reshape(6, 5)
res = np.vsplit(x, 2)
print(res[0], end='\n\n')
print(res[1])

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]

[[15 16 17 18 19]
 [20 21 22 23 24]
 [25 26 27 28 29]]


In [30]:
# It is also possible to split at specific row numbers using vsplit

res = np.vsplit(x, (2, 5))
print(res[0], end='\n\n')
print(res[1], end='\n\n')
print(res[2])

[[0 1 2 3 4]
 [5 6 7 8 9]]

[[10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]

[[25 26 27 28 29]]


In [31]:
# Arrays can be split horizontally using the generic hsplit method.

res = np.hsplit(x, (2,4))
print(res[0], end='\n\n')
print(res[1], end='\n\n')
print(res[2])

[[ 0  1]
 [ 5  6]
 [10 11]
 [15 16]
 [20 21]
 [25 26]]

[[ 2  3]
 [ 7  8]
 [12 13]
 [17 18]
 [22 23]
 [27 28]]

[[ 4]
 [ 9]
 [14]
 [19]
 [24]
 [29]]


In [35]:
x = np.arange(90).reshape(3, 15, 2)
x

array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5],
        [ 6,  7],
        [ 8,  9],
        [10, 11],
        [12, 13],
        [14, 15],
        [16, 17],
        [18, 19],
        [20, 21],
        [22, 23],
        [24, 25],
        [26, 27],
        [28, 29]],

       [[30, 31],
        [32, 33],
        [34, 35],
        [36, 37],
        [38, 39],
        [40, 41],
        [42, 43],
        [44, 45],
        [46, 47],
        [48, 49],
        [50, 51],
        [52, 53],
        [54, 55],
        [56, 57],
        [58, 59]],

       [[60, 61],
        [62, 63],
        [64, 65],
        [66, 67],
        [68, 69],
        [70, 71],
        [72, 73],
        [74, 75],
        [76, 77],
        [78, 79],
        [80, 81],
        [82, 83],
        [84, 85],
        [86, 87],
        [88, 89]]])

# Basic operation on np arrays

In [36]:
# Operations in Numpy are carried out element wise.

# Hence the expression x + 10, increases every element of array x by 10.

x = np.arange(6).reshape(2,3)
print(x + 10, end='\n\n')
print(x * 3, end='\n\n')
print(x % 2)

[[10 11 12]
 [13 14 15]]

[[ 0  3  6]
 [ 9 12 15]]

[[0 1 0]
 [1 0 1]]


In [37]:
# Operations between arrays also happen element wise.

x = np.array([[-1, 1], [-2, 2]])
y = np.array([[4, -4], [5, -5]])
print(x + y, end='\n\n')
print(x * y)

[[ 3 -3]
 [ 3 -3]]

[[ -4  -4]
 [-10 -10]]


In [38]:
# It is also possible to perform operations on arrays with varying size and shape. This is due Broadcasting feature exhibited by numpy arrays.

x = np.array([[-1, 1], [-2, 2]])
y = np.array([-10, 10])
print(x * y)

[[10 10]
 [20 20]]


**Broadcasting in NumPy**

Element wise operations between arrays are possible only when they have the same shape or compatible for Broadcasting.


Steps followed to verify the feasibility of Broadcasting between arrays are:


1. Initially, compare the dimensions of all arrays.
2. If dimensions do not match, prepend 1's to shape of a smaller array so that it matches dimensions of a larger array.
3. Start comparing array shapes from the last dimension and move backward.
4. If the shape of both arrays are equal or either of it has a shape of 1, continue the comparison.
5. Else at any dimension, if step 4 fails, broadcasting between arrays is not feasible.


Finally, the resulted broadcasting array shape would be maximum of two compared shapes in each dimension.

Below examples show feasibility of broadcasting between two arrays, having shape s1 and s2 respectively.

Given: s1 = (4, 3); s2 = (3,)

Step 1 and 2: s1 = (4, 3); s2 = (1, 3)

Step 3 and 4: pass in 2 dimensions

Result : Broadcasting feasible;

  resulted array shape - (4,3) 



Given: s1 = (5,); s2 = (5,4,3)

Step 1 and 2: s1 = (1, 1, 5); s2 = (5, 4, 3)

Step 3 and 4: fail in last dimension. ( 5 != 3)

Result : Broadcasting not feasible. 

In [39]:
# Numpy provides a lot of mathematical functions, in the form of Universal functions.

x = np.array([[0,1], [2,3]])
print(np.square(x), end='\n\n')
print(np.sin(x))


[[0 1]
 [4 9]]

[[0.         0.84147098]
 [0.90929743 0.14112001]]


In [40]:
# Many of the universal functions are available as methods of ndarray class.
# By default sum method adds all array elements.
# It is also possible to apply sum method on elements of a specific dimension, using axis argument.

x = np.array([[0,1], [2, 3]])
print(x.sum(), end='\n\n')
print(x.sum(axis=0), end='\n\n')
print(x.sum(axis=1))

6

[2 4]

[1 5]


In [41]:

x = np.arange(30).reshape(5,6)
print(x.argmax(axis=1))

[5 5 5 5 5]


In [42]:
x =  np.arange(20).reshape(4,5)
print(x.mean(axis=1))

[ 2.  7. 12. 17.]


In [45]:
# cumulative sum

x = np.array([[1,2,3], [4,5,6]])
print("Original array: ")
print(x)
print("Cumulative sum of the elements along a given axis:")
r = np.cumsum(x)
print(r)
print("\nSum over rows for each of the 3 columns:")
r = np.cumsum(x,axis=0) 
print(r)
print("\nSum over columns for each of the 2 rows:")
r = np.cumsum(x,axis=1) 
print(r)

Original array: 
[[1 2 3]
 [4 5 6]]
Cumulative sum of the elements along a given axis:
[ 1  3  6 10 15 21]

Sum over rows for each of the 3 columns:
[[1 2 3]
 [5 7 9]]

Sum over columns for each of the 2 rows:
[[ 1  3  6]
 [ 4  9 15]]


# Indexing, slicing and Iterating

In [47]:
# Slicing refers to extracting a portion of existing array.
# This can be achieved with a slice object.
# A slice object is of the form start:end:step. All three are optional.
# Having only a single number inside square brackets refer to start ind

x  = np.array([5, 10, 15, 20, 25, 30, 35])
print(x[1])  # Indexing
print(x[1:6]) # Slicing
print(x[1:6:3]) # Slicing

10
[10 15 20 25 30]
[10 25]


In [48]:
# Two slice objects, one for each dimension, are required to slice a 2-D array.
# They are separated by a comma (,) and having only a single slice object inside square brackets refers to first dimension.
# All elements of a single dimension can be referred with a colon (:).

y = np.array([[0, 1, 2],
              [3, 4, 5]])
print(y[1:2, 1:3]) 
print(y[1])   
print(y[:, 1]) 


[[4 5]]
[3 4 5]
[1 4]


In [49]:
# For slicing an n dimensional ndarray, n slice objects are required.
# Having only a single slice object refers to first dimension.

z = np.array([[[-1, 1], [-2, 2]],
              [[-4, 4], [-5, 5]],
              [[-7, 7], [-9, 9]]])
print(z[1,:,1]) # index 1 element in row of index 1
print(z[1:,1,:]) # From all outer rows except the first, select 1st index element (which itself is an array) completely.
print(z[2]) # print 2nd index element

[4 5]
[[-5  5]
 [-9  9]]
[[-7  7]
 [-9  9]]


In [50]:
# for loop can be used to iterate over every dimensional element.

x = np.array([[-1, 1], [-2, 2]])
for row in x:
    print('Row :',row)

Row : [-1  1]
Row : [-2  2]


In [51]:
# nditer method of numpy creates an iterator, which enable accessing each element one after the other.

x = np.array([[0,1], [2, 3]])
for a in np.nditer(x):
    print(a)

0
1
2
3


In [52]:
# Boolean Indexing
# Checking if every element of an array satisfies a condition, results in a Boolean array.
# This Boolean array can be used as index to filter elements that satisfy the condition.

x = np.arange(10).reshape(2,5)
condition = x % 2 == 0
print(condition)
print(x[condition])

[[ True False  True False  True]
 [False  True False  True False]]
[0 2 4 6 8]
