In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import numpy as np
import sys
# NumPy arrays are up to 50x faster that traditional Python lists. 

np.set_printoptions(threshold=sys.maxsize)

In [46]:
np.arange(3)
np.zeros(3)
np.zeros(3, dtype=int)
np.full(3, 7)
arr = np.array([[1, 2, 3], [4, 5, 6]])
arr.ndim
arr

array([0, 1, 2])

array([0., 0., 0.])

array([0, 0, 0])

array([7, 7, 7])

2

array([[1, 2, 3],
       [4, 5, 6]])

In [52]:
np.random.rand()
np.random.rand(3, 5)
x = np.random.randint(100) 			    # Generate a random integer in range [0, 99]
x = np.random.randint(100, 105)            # Generate a random integer in range [100, 104]
x = np.random.randint(100, size=(5)) 		# 1D array with 5 random integers in range [0, 99]
x = np.random.randint(100, size=(3, 5))	# 2D array 3x5 of random integerss in range [0, 99]
x = np.random.choice([3, 5, 7, 9]) 		        # returns one of the values in an array
x = np.random.choice([3, 5, 7, 9], size=(3, 5)) 	# 2D array from the values in the parameter
# specify the probability for each value. (The sum of all probabilities should be 1)
np.random.choice([3, 5, 7, 9], p=[0.1, 0.3, 0.6, 0.0], size=(3, 5))



### Random Permutations of Elements
arr = np.array([1, 2, 3, 4, 5])
np.random.shuffle(arr)             # makes changes to the original array.
print(arr)

print(np.random.permutation(arr))  # leaves the original array un-changed

0.6042451410988791

array([[0.56772487, 0.53875804, 0.14811348, 0.32351438, 0.17426057],
       [0.00238619, 0.30618561, 0.50727345, 0.63897852, 0.90861252],
       [0.26356865, 0.13409532, 0.53623655, 0.06877663, 0.98732565]])

array([[5, 7, 5, 3, 7],
       [5, 7, 3, 3, 3],
       [7, 7, 7, 3, 5]])

[1 3 5 2 4]
[5 2 3 1 4]


In [None]:
newarr = arr.astype('i') # int32
newarr = arr.astype(int) # int64

In [6]:
np.array([1, 2, 3, 4], ndmin=5)

array([[[[[1, 2, 3, 4]]]]])

In [24]:
# Reshaping arrays
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])
arr.shape
arr_new = arr.reshape(2, 4)
arr_new.shape
arr_new

(8,)

(2, 4)

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [56]:
np.transpose(arr_new, (1,0))
arr_new.transpose((1,0))

array([[0, 5],
       [2, 6],
       [3, 7],
       [4, 0]])

array([[0, 5],
       [2, 6],
       [3, 7],
       [4, 0]])

In [57]:
# reshape() returns a view !!! (a reference to the original array: changing either will change both)
arr_new.base

arr[0]=0
arr_new

x = arr.copy()		# a new array
y = arr.view()		# a view
arr[-1]=0
x
y

array([0, 2, 3, 4, 5, 6, 7, 0])

array([[0, 2, 3, 4],
       [5, 6, 7, 0]])

array([0, 3, 5, 2, 4])

array([0, 3, 5, 2, 0])

In [28]:
# You are allowed to have one "unknown" dimension. Pass -1 as the value, and NumPy will calculate this number for you.
arr.reshape(2, 2, -1)
arr.reshape(-1)		# flattening (converting into a 1D array)

array([[[0, 2],
        [3, 4]],

       [[5, 6],
        [7, 0]]])

array([0, 2, 3, 4, 5, 6, 7, 0])

In [35]:
a  = np.arange(3)
b  = a.reshape((3,1))
b
c  = np.squeeze(b)     	# c.shape  = (3,)

array([[0],
       [1],
       [2]])

There are also a lot of functions for changing the shapes of arrays in numpy flatten, ravel and also for rearranging the elements rot90, flip, fliplr, flipud etc.

In [None]:
# Iterating through each scalar of an array without nested loops:
arr = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
arr
for x in np.nditer(arr):
    print(x)							# 1 2 3 4 5 6 7 8

# op_dtypes - the expected datatype to change the datatype of elements while iterating. 
# NumPy does not change the data type of the element in-place (where the element is in array) so it needs some other space to perform this action, 
# that extra space is called buffer, and in order to enable it in nditer() we pass flags=['buffered'].
arr = np.array([1, 2, 3])
for x in np.nditer(arr, flags=['buffered'], op_dtypes=['S']):
    print(x)							# b'1' b'2' b'3'

# We can iterate with different step size:
arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
for x in np.nditer(arr[:, ::2]):
    print(x)							# 1 3 5 7

In [39]:
arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
for idx, x in np.ndenumerate(arr):
  print(idx, x)

(0, 0) 1
(0, 1) 2
(0, 2) 3
(0, 3) 4
(1, 0) 5
(1, 1) 6
(1, 2) 7
(1, 3) 8


In [None]:
# Joining NumPy Arrays
# In SQL we join tables based on a key, whereas in NumPy we join arrays by axes. 
# We pass a sequence of arrays that we want to join to the concatenate() function, along with the axis. If axis is not explicitly passed, it is taken as 0.
arr1 = np.array([[1, 2], [3, 4]])
arr2 = np.array([[5, 6], [7, 8]])
arr = np.concatenate((arr1, arr2), axis=1) 	# [[1 2 5 6]
  #  [3 4 7 8]]

# Stack Functions
# Stacking is same as concatenation, the only difference is that stacking is done along a new axis. We can concatenate two 1-D arrays along the second axis which would result in putting them one over the other, ie. stacking. We pass a sequence of arrays that we want to join to the stack() method along with the axis. If axis is not explicitly passed it is taken as 0. 
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])
arr = np.stack((arr1, arr2), axis=1) 		
# [[1 4]
#  [2 5]
#  [3 6]]
# stack along rows 
arr = np.hstack((arr1, arr2)) # [1 2 3 4 5 6]

# stack along columns
arr = np.vstack((arr1, arr2)) # [[1 2 3]
  #  [4 5 6]]
# stack along height (depth)
arr = np.dstack((arr1, arr2)) # [[[1 4]
    # [2 5]
    # [3 6]]]
    
# Splitting NumPy Arrays
# There is a method split() available but it will not adjust the elements when the array has less elements than required, so instead we can use array_split()
arr = np.array([1, 2, 3, 4, 5, 6])
newarr = np.array_split(arr, 3)	  	# [array([1, 2]), array([3, 4]), array([5, 6])]
print(newarr[0])				# [1 2]
arr = np.array([1, 2, 3, 4, 5, 6])	# not enough elements for split
newarr = np.array_split(arr, 4)	  # [array([1, 2]), array([3, 4]), array([5]), array([6])]

# 2D
arr = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]])
newarr = np.array_split(arr, 3)
# _______________________________
# [array([[1, 2], [3, 4]]), 
#  array([[5, 6], [7, 8]]), 
#  array([[ 9, 10], [11, 12]])]

# Similar to stack functions you can use axis parameter, or use functions vsplit() and dsplit()



In [None]:

# Searching Arrays
# where() - search an array for a certain value, and return the indexes that get a match.
arr = np.array([1, 2, 3, 4, 5, 4, 4])
x = np.where(arr == 4) 	# returns a tuple: (array([3, 5, 6],) 
x = np.where(arr%2 == 0) 	# find the indexes where the values are even

# searchsorted() – performs a binary search in the array, and returns the first index where the specified value would be inserted to preserve the order. This method is assumed to be used on sorted arrays!
arr = np.array([6, 7, 8, 9])
x = np.searchsorted(arr, 7)			# x = 1 

# By default the search goes from left-to-right, to go in the opposite direction:
arr = np.array([6, 7, 8, 9])
x = np.searchsorted(arr, 7, side='right')	# x = 2

# To search for multiple values, use an array with the specified values:
arr = np.array([1, 3, 5, 7])
x = np.searchsorted(arr, [2, 4, 6]) 		# x = [1 2 3] 
# Sorting Arrays
arr = np.array([3, 2, 0, 1])
print(np.sort(arr))

# This method returns a copy of the array, leaving the original array unchanged. You can also sort arrays of strings, Booleans or any other data type. If you use it on a 2D array, both arrays will be sorted:
arr = np.array([[3, 2, 4], [5, 0, 1]])
print(np.sort(arr))				# [[2 3 4]
  #  [0 1 5]]
# Filtering Arrays
# Getting some elements out of an existing array and creating a new array out of them is called filtering.
# In NumPy, you filter an array using a boolean index list (a list of booleans corresponding to indexes in the array). 
arr = np.array([41, 42, 43, 44])
x = [True, False, True, False]
newarr = arr[x] 				# [41 43]

# Instead of hard-coding True / False values, it’s more common to create a filter based on conditions:
arr = np.array([41, 42, 43, 44])
filter_arr = []
for element in arr:
  if element > 42:
    filter_arr.append(True)
  else:
    filter_arr.append(False)
newarr = arr[filter_arr]
print(filter_arr)				# [False, False, True, True]
print(newarr)					# [43 44]

# We can create filter directly from array
arr = np.array([41, 42, 43, 44])
filter_arr = arr > 42
newarr = arr[filter_arr]

In [None]:
# NumPy ufuncs
# ufuncs stands for "Universal Functions" and they are NumPy functions that operates on the ndarray object. ufuncs are used to implement vectorization in NumPy which is way faster than iterating over elements. They also provide broadcasting and additional methods like reduce, accumulate etc. that are very helpful for computation.
# Vectorization
# Both GPU and CPU have parallization instuctions (SIMD – single instruction multiple data), which are used in numpy functions like .dot() which is used for matrix multiplication (unlike * which is element-wise). GPU is especially good for this, CPU is no bad either.

# Add the Elements of Two Lists
# without ufunc, we can use Python's built-in zip() method:
x = [1, 2, 3, 4]
y = [4, 5, 6, 7]
z = []

for i, j in zip(x, y):
  z.append(i + j)
print(z)

# with ufunc, we can use the add() function:
x = [1, 2, 3, 4]
y = [4, 5, 6, 7]
z = np.add(x, y)
print(z)

# Create your own ufunc
# define a function, like you do with normal functions in Python, then add it to your NumPy ufunc library using frompyfunc(function_name, nr_of_input_arrays, nr_of_output_arrays)
def myadd(x, y):
  return x+y

myadd = np.frompyfunc(myadd, 2, 1)
print(myadd([1, 2, 3, 4], [5, 6, 7, 8]))	# [6 8 10 12]

# Check if a Function is a ufunc
print(type(np.add))		# ufunc should return <class 'numpy.ufunc'>

# If the function is not recognized at all, it will return an error
# To test if the function is a ufunc in an if statement, use the numpy.ufunc value (or np.ufunc if you use np as an alias for numpy):
if type(np.add) == np.ufunc:
  print('add is ufunc')
else:
  print('add is not ufunc')



In [None]:
# Arithmetic
# You could use arithmetic operators + - * / directly between NumPy arrays, but there are also functions that can take any array-like objects e.g. lists, tuples etc. 
# and perform arithmetic conditionally (take a where parameter in which we can specify that condition and return results in a new array)
arr1 = np.array([1, 2, 3])
arr2 = np.array([1, 2, 3])

newarr = np.add(arr1, arr2) 		# [2 4 6]
newarr = np.sum([arr1, arr2])		# 12
newarr = np.sum([arr1, arr2], axis=1) 	# [6 6]
newarr = np.divmod(arr1, arr2) 		# returns two arrays: the quotient and the mod.

# Rounding Decimals
arr = np.trunc([-3.1666, 3.6667])	# [-3.  3.]
arr = np.fix([-3.1666, 3.6667])		# [-3.  3.]
arr = np.around(3.1666, 2)			# 3.17


# Logs
arr = np.arange(1, 10)	# array of integers starting from 1 (included) to 10 (not included)

print(np.log2(arr)) 		# log at the base 2
print(np.log10(arr)) 		# log at the base 10
print(np.log(arr)) 		# log at the base e 

# Note: All of the log functions will place -inf or inf in the elements if the log can not be computed.

# Log at Any Base
# NumPy does not provide any function to take log at any base, so we can use the frompyfunc() function along with inbuilt function math.log() with two input parameters and one output parameter:

from math import log
nplog = np.frompyfunc(log, 2, 1)
print(nplog(100, 15))


# Products
arr = np.array([1, 2, 3, 4])
x = np.prod(arr) 				# 1*2*3*4 = 24

arr1 = np.array([1, 2, 3, 4])
arr2 = np.array([5, 6, 7, 8])
x = np.prod([arr1, arr2]) 			# 1*2*3*4*5*6*7*8 = 40320
newarr = np.prod([arr1, arr2], axis=1) 	# [24 1680]

# Cummulative product – means taking the product partially.
arr = np.array([5, 6, 7, 8])
newarr = np.cumprod(arr) 			# [5 30 210 1680]
# Differences
# Discrete difference – subtracting two successive elements
arr = np.array([10, 15, 25, 5])
newarr = np.diff(arr) 			# [5 10 -20]  (15-10, 25-15, 5-25)

# We can perform this operation repeatedly (on the result) by giving parameter n.
arr = np.array([10, 15, 25, 5])
newarr = np.diff(arr, n=2) 			# [5 -30]  (10-5, -20-10)
# LCM (Lowest Common Multiple)
x = np.lcm(4, 6)				# 12

arr = np.array([3, 6, 9])
x = np.lcm.reduce(arr)			# 18

# The reduce() method will use the ufunc, in this case the lcm() function, on each element, and reduce the array by one dimension.
# GCD (Greatest Common Denominator)
x = np.gcd(6, 9)				# 3

arr = np.array([20, 8, 32, 36, 16])
x = np.gcd.reduce(arr)			# 4

# The reduce() method will use the ufunc, in this case the gcd() function, on each element, and reduce the array by one dimension.


In [None]:
# Trigonometric Functions
# Ufuncs sin(), cos(), tan() take values in radians and return sin, cos and tan values.
# Ufuncs arcsin(), arccos(), arctan() take sin, cos and tan values and return radian values

x = np.sin(np.pi/2)						# 1.0
arr = np.array([np.pi/2, np.pi/3, np.pi/4, np.pi/5])  
x = np.sin(arr) 						# [1.  0.8660254  0.70710678  0.58778525]
x = np.arcsin(1.0) 						# 1.5707963267948966
arr = np.array([1, -1, 0.1])
x = np.arcsin(arr)						# [ 1.57079633 -1.57079633 0.10016742]

# Convert Degrees Into Radians
arr = np.array([90, 180, 270, 360])				# radians = pi/180 * degrees
x = np.deg2rad(arr)						# [1.57079633 3.14159265 4.71238898 6.28318531]

# Convert Radians to Degrees
arr = np.array([np.pi/2, np.pi, 1.5*np.pi, 2*np.pi])
x = np.rad2deg(arr)						# [ 90. 180. 270. 360.]
# Hypotenues
base = 3
perp = 4
x = np.hypot(base, perp) # Finding hypotenues using pythagoras theorem

# Hyperbolic Functions
# Ufuncs sinh(), cosh(), tanh() take values in radians and return sinh, cosh and tanh values
# Ufuncs arcsinh(), arccosh(), arctanh() take sinh, cosh and tanh values and return radian values

x = np.sinh(np.pi/2)						# 2.3012989023072947
arr = np.array([np.pi/2, np.pi/3, np.pi/4, np.pi/5])
x = np.cosh(arr)						# [2.50917848 1.60028686 1.32460909 1.20397209]
x = np.arcsinh(1.0)						# 0.881373587019543
arr = np.array([0.1, 0.2, 0.5])
x = np.arctanh(arr)						# [0.10033535 0.20273255 0.54930614]


In [None]:
# NumPy Set Operations (set arrays should only be 1-D arrays!)
arr = np.array([1, 1, 1, 2, 3, 4, 5, 5, 6, 7])
x = np.unique(arr)						# [1 2 3 4 5 6 7]

# Finding Union, Intersection
arr1 = np.array([1, 2, 3, 4])
arr2 = np.array([3, 4, 5, 6])

newarr = np.union1d(arr1, arr2)				# [1 2 3 4 5 6]
newarr = np.intersect1d(arr1, arr2, assume_unique=True)	# [3 4]
newarr = np.setdiff1d(set1, set2, assume_unique=True)	# [1 2] are in 1st set but NOT in 2nd
newarr = np.setxor1d(set1, set2, assume_unique=True)	# [1 2 5 6] NOT in BOTH sets
# argument assume_unique is optional, but it should always be set to True when dealing with sets, bc it can speed up computation.