# C1W1A1_Lite_Python_Basics_With_Numpy

In [2]:
import numpy
import math

## 1 - Building basic functions with numpy
<br> 
### 1.1 - Sigmoid function, np.exp()
$$sigmoid(x) = \frac{1}{1+e^{(-x)}}$$

In [6]:
# Graded Function : basic_sigmoid
def basic_sigmoid(x):
    s = 1 / (1 + math.exp(-x))
    return s

In [4]:
def sigmoid(x):
    s = 1 / (1 + np.exp(-x))
    return s

In [5]:
x = np.array([1, 2, 3])
sigmoid(x)

array([0.73105858, 0.88079708, 0.95257413])

### 1.2 - Sigmoid gradient
$$ sigmoid\_derivative(x) = \sigma'(x) = \sigma(x)(1-\sigma(x))$$

In [14]:
def sigmoid_derivative(x):
    s = sigmoid(x)
    ds = s * (1 - s)
    return ds

In [16]:
x = np.array([1, 2, 3])
print('sigmoid_derivative(x) = ' + str(sigmoid_derivative(x)))

sigmoid_derivative(x) = [0.19661193 0.10499359 0.04517666]


### 1.3 - Reshaping arrays
<img src="images/image2vector_kiank.png" style="width:500px;height:300;">'
- Implenment **image2vector()** that takes an input of shape(length, height, 3) and returns a vector of shape (length\*height\*3, 1).

In [17]:
def image2vector(image):
    v = image.reshape(-1, 1)
    return v

### 1.4 - Normalizing rows
if $$x = 
\begin{bmatrix}
    0 & 3 & 4 \\
    2 & 6 & 4 \\
\end{bmatrix}\tag{3}$$ then $$\| x\| = np.linalg.norm(x, axis = 1, keepdims = True) = \begin{bmatrix}
    5 \\
    \sqrt{56} \\
\end{bmatrix}\tag{4} $$and        $$ x\_normalized = \frac{x}{\| x\|} = \begin{bmatrix}
    0 & \frac{3}{5} & \frac{4}{5} \\
    \frac{2}{\sqrt{56}} & \frac{6}{\sqrt{56}} & \frac{4}{\sqrt{56}} \\
\end{bmatrix}\tag{5}$$

In [32]:
def normalizeRows(x):
    # axis=1 表示行,keepdims表示矩阵的二维特性,结果是[[a_11],[a_21]]而不是[a_11,a_12]
    x_norm = np.linalg.norm(x, axis=1, keepdims=True)
    print(x_norm)
    x = x / x_norm
    return x

In [33]:
x = np.array([[0, 3, 4], [2, 6, 4]])
print("normalizeRows(x): " + str(normalizeRows(x)))

[[5.        ]
 [7.48331477]]
normalizeRows(x): [[0.         0.6        0.8       ]
 [0.26726124 0.80178373 0.53452248]]


### 1.5 - Broadcasting and the softmax function
**Instructions**:
- $ \text{for } x \in \mathbb{R}^{1\times n} \text{,     } softmax(x) = softmax(\begin{bmatrix}
    x_1  &&
    x_2 &&
    ...  &&
    x_n  
\end{bmatrix}) = \begin{bmatrix}
     \frac{e^{x_1}}{\sum_{j}e^{x_j}}  &&
    \frac{e^{x_2}}{\sum_{j}e^{x_j}}  &&
    ...  &&
    \frac{e^{x_n}}{\sum_{j}e^{x_j}} 
\end{bmatrix} $ 

- $\text{for a matrix } x \in \mathbb{R}^{m \times n} \text{,  $x_{ij}$ maps to the element in the $i^{th}$ row and $j^{th}$ column of $x$, thus we have: }$  $$softmax(x) = softmax\begin{bmatrix}
    x_{11} & x_{12} & x_{13} & \dots  & x_{1n} \\
    x_{21} & x_{22} & x_{23} & \dots  & x_{2n} \\
    \vdots & \vdots & \vdots & \ddots & \vdots \\
    x_{m1} & x_{m2} & x_{m3} & \dots  & x_{mn}
\end{bmatrix} \\
= \begin{bmatrix}
    \frac{e^{x_{11}}}{\sum_{j}e^{x_{1j}}} & \frac{e^{x_{12}}}{\sum_{j}e^{x_{1j}}} & \frac{e^{x_{13}}}{\sum_{j}e^{x_{1j}}} & \dots  & \frac{e^{x_{1n}}}{\sum_{j}e^{x_{1j}}} \\
    \frac{e^{x_{21}}}{\sum_{j}e^{x_{2j}}} & \frac{e^{x_{22}}}{\sum_{j}e^{x_{2j}}} & \frac{e^{x_{23}}}{\sum_{j}e^{x_{2j}}} & \dots  & \frac{e^{x_{2n}}}{\sum_{j}e^{x_{2j}}} \\
    \vdots & \vdots & \vdots & \ddots & \vdots \\
    \frac{e^{x_{m1}}}{\sum_{j}e^{x_{mj}}} & \frac{e^{x_{m2}}}{\sum_{j}e^{x_{mj}}} & \frac{e^{x_{m3}}}{\sum_{j}e^{x_{mj}}} & \dots  & \frac{e^{x_{mn}}}{\sum_{j}e^{x_{mj}}}
\end{bmatrix} = \begin{pmatrix}
    softmax\text{(first row of x)}  \\
    softmax\text{(second row of x)} \\
    ...  \\
    softmax\text{(last row of x)} \\
\end{pmatrix} $$

In [35]:
def softmax(x):
    x_exp = np.exp(x)
    x_sum = np.sum(x_exp, axis=1, keepdims=True)
    s = x_exp / x_sum
    return s

## 2 - Vectorization

**Note:**
<br>
**np.dot()** performs a martrix-matrix or matrix-vector multiplication. This is different from $\color{red}{np.multiply()}$  and the  $\color{red}{*}$  operator (which is equivalent to $\color{red}{.*}$ in Matlab/Octave), which performs an element-wise multiplication.

In [44]:
import time

x1 = [9, 2, 5, 0, 0, 7, 5, 0, 0, 0, 9, 2, 5, 0, 0]
x2 = [9, 2, 2, 9, 0, 9, 2, 5, 0, 0, 9, 2, 5, 0, 0]

print('CLASSIC GENERAL DOT PRODUCT IMPLEMENTATION:')
W = np.random.rand(3, len(x1))  # Random 3*len(x1) numpy array
tic = time.process_time()
gdot = np.zeros(W.shape[0])
for i in range(W.shape[0]):
    for j in range(len(x1)):
        gdot[i] += W[i, j] * x1[j]
toc = time.process_time()
print("gdot = " + str(gdot) + "\n ----- Computation time = " +
      str(1000 * (toc - tic)) + "ms")

print('VECTORIZED GENERAL DOT PRODUCT:')
tic = time.process_time()
dot = np.dot(W, x1)
toc = time.process_time()
print("gdot = " + str(dot) + "\n ----- Computation time = " +
      str(1000 * (toc - tic)) + "ms")

CLASSIC GENERAL DOT PRODUCT IMPLEMENTATION:
gdot = [22.19480675 16.45082372 24.31485385]
 ----- Computation time = 0.1710000000008094ms
VECTORIZED GENERAL DOT PRODUCT:
gdot = [22.19480675 16.45082372 24.31485385]
 ----- Computation time = 0.1120000000005561ms


### 2.1 Implement the L1 and L2 loss functions
- L1 loss is defined as:
$$L_1(\hat{y}, y) = \sum_{i=0}^m|y^{(i)} - \hat{y}^{(i)}| $$
- L2 loss is defined as $$ L_2(\hat{y},y) = \sum_{i=0}^m(y^{(i)} - \hat{y}^{(i)})^2 $$

In [45]:
def L1(y_hat, y):
    loss = np.sum(np.abs(y - y_hat))
    return loss

In [56]:
def L2(y_hat, y):
    loss = np.sum((y - y_hat)**2)
    # loss = np.sum(np.dot(y-y_hat,y-y_hat))
    return loss