<a href="https://colab.research.google.com/github/vishkaush/misc-projects/blob/main/Python_Tidbits.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Init

In [None]:
import numpy as np

# Rank 1 Arrays, Vectors, Matrices

## Python-Numpy Vectors vs Rank 1 Arrays

In [None]:
import numpy as np

a = np.random.randn(5) # creates 5 random Gaussian numbers
print("a= ", a)
print("a's shape = ", a.shape) # rank 1 array - neither row vector, nor column vector
print("a transpose = ", a.T) # remains the same!
print("a dot a transpose = ", np.dot(a,a.T)) # you expect a 5X5 matrix, but you get a real number!
# fix this by explicitly making it a column vector or a row vector like so:
a = a.reshape((1,5))
print("reshaped a = ", a)
print("shape of reshaped a = ", a.shape)

a=  [-0.57355539  0.2504224  -0.91226591 -1.10557317  0.75912332]
a's shape =  (5,)
a transpose =  [-0.57355539  0.2504224  -0.91226591 -1.10557317  0.75912332]
a dot a transpose =  3.022466504603177
reshaped a =  [[-0.57355539  0.2504224  -0.91226591 -1.10557317  0.75912332]]
shape of reshaped a =  (1, 5)


In [None]:
# Hence, better to mention the dimensions explicitly even for vectors, i.e. treat vectors as matrices
a = np.random.randn(5,1)
print("a = ", a)
print("a's shape = ", a.shape) 
print("a transpose = ", a.T) 
prod = np.dot(a,a.T)
print("a dot a transpose = ", prod) 
assert(prod.shape == (5,5))  # good practice to do a sanity check of dimensions

a =  [[-0.3280932 ]
 [ 0.94028645]
 [-0.19110386]
 [ 0.09285372]
 [ 0.50264559]]
a's shape =  (5, 1)
a transpose =  [[-0.3280932   0.94028645 -0.19110386  0.09285372  0.50264559]]
a dot a transpose =  [[ 0.10764515 -0.30850159  0.06269988 -0.03046467 -0.1649146 ]
 [-0.30850159  0.8841386  -0.17969237  0.08730909  0.47263083]
 [ 0.06269988 -0.17969237  0.03652069 -0.0177447  -0.09605751]
 [-0.03046467  0.08730909 -0.0177447   0.00862181  0.04667251]
 [-0.1649146   0.47263083 -0.09605751  0.04667251  0.25265259]]


## Sum

In [None]:
A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print("A = ", A)
print("A's shape = ", A.shape)
sumHorizontally = A.sum(axis=1, keepdims=True)
print("After horizontal sum (preserving dimensions)= ", sumHorizontally)
print("Shape after horizontal sum = ", sumHorizontally.shape)
sumVertically = A.sum(axis=0)
print("After vertical sum = ", sumVertically)
print("Shape after vertical sum = ", sumVertically.shape)

A =  [[1 2 3]
 [4 5 6]
 [7 8 9]]
A's shape =  (3, 3)
After horizontal sum (preserving dimensions)=  [[ 6]
 [15]
 [24]]
Shape after horizontal sum =  (3, 1)
After vertical sum =  [12 15 18]
Shape after vertical sum =  (3,)


## Squeeze

In [None]:
import numpy as np
a = np.random.randn(2,3,3)
print(a)
print("a's shape before squeeze = ", a.shape)
a = np.squeeze(a)
print(a)
print("a's shape after squeeze = ", a.shape)
a = [[[17]]]
a = np.array(a)
print(a)
print("a's shape before squeeze = ", a.shape)
a = np.squeeze(a)
print(a)
print("a's shape after squeeze = ", a.shape)

[[[-2.58416815  0.24120928  0.26002535]
  [ 0.0380178  -0.1040927  -0.13101809]
  [-0.18246522 -0.88276685  0.96643864]]

 [[-1.60314646  0.42656466 -0.33935789]
  [-0.24507877  0.63956228  1.33653208]
  [ 0.16451404 -0.54149442  0.21941277]]]
a's shape before squeeze =  (2, 3, 3)
[[[-2.58416815  0.24120928  0.26002535]
  [ 0.0380178  -0.1040927  -0.13101809]
  [-0.18246522 -0.88276685  0.96643864]]

 [[-1.60314646  0.42656466 -0.33935789]
  [-0.24507877  0.63956228  1.33653208]
  [ 0.16451404 -0.54149442  0.21941277]]]
a's shape after squeeze =  (2, 3, 3)
[[[17]]]
a's shape before squeeze =  (1, 1, 1)
17
a's shape after squeeze =  ()


# Benefits of Vectorization

## Dot (Inner) Product

In [None]:
import numpy as np
import math
a = np.random.rand(100)
b = np.random.rand(100)
%time c=np.dot(a,b)
print("Dot product = ", c)

CPU times: user 22 µs, sys: 0 ns, total: 22 µs
Wall time: 26.5 µs
Dot product =  22.142277202972636


In [None]:
%%time
c = 0
for i in range(len(a)):
  c += a[i]*b[i]

CPU times: user 66 µs, sys: 17 µs, total: 83 µs
Wall time: 84.9 µs


In [None]:
print("Dot product = ", c)

Dot product =  22.14227720297264


## Outer Product

In [None]:
%%time
outer = np.zeros((len(a),len(b)))
for i in range(len(a)):
    for j in range(len(b)):
        outer[i,j] = a[i]*b[j]

CPU times: user 10.3 ms, sys: 0 ns, total: 10.3 ms
Wall time: 10.8 ms


In [None]:
%time outer = np.outer(a,b)

CPU times: user 152 µs, sys: 0 ns, total: 152 µs
Wall time: 157 µs


## Element-wise multiplication

In [None]:
%%time
mul = np.zeros(len(a))
for i in range(len(a)):
    mul[i] = a[i]*b[i]

CPU times: user 0 ns, sys: 133 µs, total: 133 µs
Wall time: 156 µs


In [None]:
%time mul = np.multiply(a,b)

CPU times: user 36 µs, sys: 6 µs, total: 42 µs
Wall time: 55.6 µs


## Product of Matrix and Vector

In [None]:
A = np.random.rand(1000,1000)
v = np.random.rand(1000)
u = np.zeros((1000, 1))

In [None]:
%%time
for i in range(1000):
  for j in range(1000):
    u[i] += A[i][j]*v[j]

CPU times: user 3.59 s, sys: 0 ns, total: 3.59 s
Wall time: 3.6 s


In [None]:
%%time
u = np.dot(A,v)  # np.dot is used for matrix-matrix and matrix-vector multiplication

CPU times: user 5.32 ms, sys: 0 ns, total: 5.32 ms
Wall time: 4.73 ms


## Exponent

In [None]:
%%time
u = np.zeros((1000,1))
for i in range(1000):
  u[i]=math.exp(v[i])

CPU times: user 1.07 ms, sys: 0 ns, total: 1.07 ms
Wall time: 1.13 ms


In [None]:
%%time
u = np.exp(v)

CPU times: user 70 µs, sys: 0 ns, total: 70 µs
Wall time: 74.1 µs


# Frequently Used Operations

## Generating a sequence of numbers

In [None]:
# only integral
for i in range(1, 10, 2):
  print(i, end=", ")

1, 3, 5, 7, 9, 

In [None]:
[x * 0.1 for x in range(0, 10)]

[0.0,
 0.1,
 0.2,
 0.30000000000000004,
 0.4,
 0.5,
 0.6000000000000001,
 0.7000000000000001,
 0.8,
 0.9]

In [None]:
import numpy as np
np.arange(1, 2, 0.1) # same as scipy.arange

array([1. , 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9])

In [None]:
np.arange(1, 1.3, 0.1) # should produce three numbers, but produces 4 due to floating point round off error

array([1. , 1.1, 1.2, 1.3])

In [None]:
# preferred and more accurate approach
np.linspace(1, 2, 10)


array([1.        , 1.11111111, 1.22222222, 1.33333333, 1.44444444,
       1.55555556, 1.66666667, 1.77777778, 1.88888889, 2.        ])

In [None]:
# Reverse sequence
for i in reversed(range(5)):
  print(i)

4
3
2
1
0


## Resetting all values based on a condition

In [None]:
A = np.random.randn(2,3)
print(A)
B = np.random.rand(2,3)
print(B)
# wherever A is negative, we want those corresponding values of B to be set to 0
B[A<0]=0
print(B)

[[ 0.31733538  0.01038299  0.76026247]
 [-0.25217665  0.00297796 -0.20304938]]
[[0.52945674 0.17640043 0.52786446]
 [0.42747632 0.86671306 0.07339918]]
[[0.52945674 0.17640043 0.52786446]
 [0.         0.86671306 0.        ]]


In [None]:
probs = np.random.rand(1, 10)
print(probs)
#wherever probs > 0.5, set it to 0
preds = np.where(probs>0.5, 1, 0)  
print(preds)

[[0.38615803 0.16894986 0.67880304 0.59000636 0.53934493 0.21668012
  0.78379128 0.82952814 0.54036008 0.49767768]]
[[0 0 1 1 1 0 1 1 1 0]]


## Comparing two binary vectors

In [None]:
one = np.array([[1, 0, 1, 0, 1, 0, 1, 0, 1, 1]])
two = np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
accuracy1 = 100 - np.mean(np.abs(two - one)) * 100     # 1 - wherever they are different
print(accuracy1)
accuracy2 = ((np.dot(one,two.T) + np.dot(1-one,(1-two).T))/(one.size)*100).squeeze()  #both 1s + both 0s
print(accuracy2)
accuracy3 = (np.sum((one == two)/one.size))*100
print(accuracy3)

60.0
60.0
60.0


# Some Python Features

## Chaining operators

In [None]:
a=3
if a>2 and a<5:
  print('Eureka!')
if 2<a<5:
  print('Eureka!')

Eureka!
Eureka!


## Post Python 3.7, dictionaries are ordered

In [None]:
a = {'one':1, 'two':2, 'three':3, 'four':4, 'five':5}
print(a)
for key in a:
  print(key)

{'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5}
one
two
three
four
five


The keys in a dictionary are much like a set, which is a collection of hashable and unique objects. Because the objects need to be hashable, mutable objects can’t be used as dictionary keys.

In Python 2.7, dictionaries are unordered structures. The order of the dictionaries’ items is scrambled. This means that the order of the items is deterministic and repeatable. If you leave the interpreter and open a new interactive session later, you’ll get the same item order

In Python 3.5, dictionaries are still unordered, but this time, randomized data structures. This means that every time you re-run the dictionary, you’ll get a different items order.

In Python 3.6 and beyond, the keys and values of a dictionary are iterated over in the same order in which they were created. However, this behavior may vary across different Python versions, and it depends on the dictionary’s history of insertions and deletions.

# Random

In [None]:
# get a random number between [x,y]
import random
a = random.randint(3,10)
print(a)

9
