# What you need to remember: 
* np.exp(x) works for any np.array x and applies the exponential function to every coordinate 
* the sigmoid function and its gradient 
* image2vector is commonly used in deep learning 
* np.reshape is widely used. In the future, you’ll see that keeping your matrix/vector dimensions straight will go toward eliminating a lot of bugs. 
* numpy has efficient built-in functions 
* broadcasting is extremely useful


In [7]:
#!/usr/bin/python
# GRADED FUNCTION: basic_sigmoid import math
def basic_sigmoid(x): 
    """ 
    Compute sigmoid of x.
    Arguments: x -- A scalar
    Return:
    s -- sigmoid(x) 
    """
    ### START CODE HERE ### (￿ 1 line of code) 
    s = 1/(1+math.exp(-x)) 
    ### END CODE HERE ###
    return s


In [8]:
basic_sigmoid(2)

0.8807970779778823

In [1]:
import numpy as np
# example of np.exp 
x = np.array([1, 2, 3]) 
print(np.exp(x)) 

[  2.71828183   7.3890561   20.08553692]


In [2]:
# example of vector operation 
x = np.array([1, 2, 3]) 
print (x + 3)

[4 5 6]


In [2]:
# GRADED FUNCTION: sigmoid 
import numpy as np 
# this means you can access numpy functions by writing np.function() instead of numpy.function(), 
def sigmoid(x):
    """ 
    Compute the sigmoid of x
    Arguments: 
    x -- A scalar or numpy array of any size
    Return: 
    s -- sigmoid(x) 
    """
    ### START CODE HERE ### (1 line of code) 
    s = 1/(1+np.exp(-x)) 
    ### END CODE HERE ###
    return s

In [3]:
x = np.array([1,2,3]) 
sigmoid(x)

array([ 0.73105858,  0.88079708,  0.95257413])

In [4]:
# GRADED FUNCTION: sigmoid_derivative 
def sigmoid_derivative(x): 
    """ 
    Compute the gradient (also called the slope or derivative) 
    of the sigmoid function with respect to its input x.
    You can store the output of the sigmoid function into 
    variables and then use it to calculate the gradient.
    Arguments: 
    x -- A scalar or numpy array
    Return: 
    ds -- Your computed gradient. 
    """
    ### START CODE HERE ### (￿ 2 lines of code) 
    s = 1/(1+np.exp(-x)) 
    ds = s*(1-s)
    ### END CODE HERE ###
    return ds

In [5]:
# GRADED FUNCTION: image2vector 
def image2vector(image): 
    """ 
    Argument: 
    image -- a numpy array of shape (length, height, depth)
    Returns: 
    v -- a vector of shape (length*height*depth, 1) 
    """
    ### START CODE HERE ### (1 line of code) 
    v = image.reshape((image.shape[0]*image.shape[1]*image.shape[2]),1) 
    ### END CODE HERE ###
    return v

In [6]:
# GRADED FUNCTION: normalizeRows 
def normalizeRows(x):
    """ 
    Implement a function that normalizes each row of the matrix x (to have unit length).
    Argument: 
    x -- A numpy matrix of shape (n, m)
    Returns:
    x -- The normalized (by row) numpy matrix. You are allowed to modify x.
    """
    ### START CODE HERE ### (2 lines of code) 
    # Compute x_norm as the norm 2 of x.
    # Use np.linalg.norm(..., ord = 2, axis = ..., keepdims = True)
    # linalg=linear（线性）+algebra（代数），norm则表示范数。
    # ord=2，代表求的范数是：平方和开根号
    # ord=1，代表求得范数是：取绝对值再求和
    x_norm = np.linalg.norm(x,axis=1,keepdims=True)
    # Divide x by its norm. 
    x = x/x_norm 
    ### END CODE HERE ###
    return x

In [7]:
x = np.array([[0, 3, 4], [1, 6, 4]]) 
print("normalizeRows(x) = " + str(normalizeRows(x)))

normalizeRows(x) = [[ 0.          0.6         0.8       ]
 [ 0.13736056  0.82416338  0.54944226]]


In [8]:
# GRADED FUNCTION: softmax 
def softmax(x): 
    """
    Calculates the softmax for each row of the input x.
    Your code should work for a row vector and also for matrices of shape (n, m).
    Argument: 
    x -- A numpy matrix of shape (n,m)
    Returns: 
    s -- A numpy matrix equal to the softmax of x, of shape (n,m) """
    # Apply exp() element-wise to x. Use np.exp(...). 
    x_exp = np.exp(x)
    # Create a vector x_sum that sums each row of x_exp. Use np.sum(..., axis = 1, keepdims = True).
    x_sum = np.sum(x_exp,axis = 1,keepdims = True)
    # Compute softmax(x) by dividing x_exp by x_sum. It should automatically use numpy broadcasting.
    s = x_exp/x_sum
    return s

In [9]:
x = np.array([[0, 3, 4], [1, 6, 4]]) 
print("softmax(x) = " + str(softmax(x)))

softmax(x) = [[ 0.01321289  0.26538793  0.72139918]
 [ 0.00589975  0.8756006   0.11849965]]


## Vectorization 

In [10]:
import time
x1 = [9, 2, 5, 0, 0, 7, 5, 0, 0, 0, 9, 2, 5, 0, 0] 
x2 = [9, 2, 2, 9, 0, 9, 2, 5, 0, 0, 9, 2, 5, 0, 0]

### CLASSIC DOT PRODUCT OF VECTORS IMPLEMENTATION ### 
#dot矩阵乘法：每一行对应位置元素相乘再求和，x和y
tic = time.process_time() 
dot = 0 
for i in range(len(x1)): 
    dot+= x1[i]*x2[i] 
toc = time.process_time() 
print ("dot = " + str(dot) + "\n ----- Computation time = " + 
       str(1000*(toc - tic)) + "ms"),

### CLASSIC OUTER PRODUCT IMPLEMENTATION ### 
tic = time.process_time() 
#outer是a的第一个元素跟b的每一个元素相乘作为第一行，第二个元素跟b的每一个元素相乘作为第二个元素...
#输出就是x1的元素个数作为行数，x2的元素个数作为列数。即输出矩阵的维度：len(a),len(b)
outer = np.zeros((len(x1),len(x2))) 
    # we create a len(x1)*len(x2) matrix with only zeros, 
for i in range(len(x1)): 
        for j in range(len(x2)): 
            outer[i,j] = x1[i]*x2[j] 
toc = time.process_time() 
print ("outer = " + str(outer) + "\n ----- Computation time = " + 
                   str(1000*(toc - tic)) + "ms"), 

### CLASSIC ELEMENTWISE IMPLEMENTATION ### 
tic = time.process_time()
#multiply是对应位相乘(但不求和,sum(np.multiply(x1,x2))=np.dot(x1,x2))
mul = np.zeros(len(x1)) 
for i in range(len(x1)): 
    mul[i] = x1[i]*x2[i] 
toc = time.process_time() 
print(sum(mul))
print ("elementwise multiplication = " + str(mul) + "\n ----Computation time = " + 
       str(1000*(toc - tic)) + "ms"),
### CLASSIC GENERAL DOT PRODUCT IMPLEMENTATION ### 
W = np.random.rand(3,len(x1)) # Random 3*len(x1) numpy array 
tic = time.process_time() 
#shape函数是numpy.core.fromnumeric中的函数，它的功能是读取矩阵的长度
#比如shape[0]就是读取矩阵第一维度的长度。
gdot = np.zeros(W.shape[0])
print(gdot)
#W的第一行和x1做dot运算，即相乘再求和，作为第一个元素。每一行乘完求和为一个元素。
for i in range(W.shape[0]): 
    print(i)
    for j in range(len(x1)): 
        gdot[i] += W[i,j]*x1[j]
toc = time.process_time() 
print ("gdot = " + str(gdot) + "\n ----- Computation time = " + str(1000*(toc - tic)) + "ms")

dot = 278
 ----- Computation time = 0.0ms
outer = [[ 81.  18.  18.  81.   0.  81.  18.  45.   0.   0.  81.  18.  45.   0.
    0.]
 [ 18.   4.   4.  18.   0.  18.   4.  10.   0.   0.  18.   4.  10.   0.
    0.]
 [ 45.  10.  10.  45.   0.  45.  10.  25.   0.   0.  45.  10.  25.   0.
    0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.]
 [ 63.  14.  14.  63.   0.  63.  14.  35.   0.   0.  63.  14.  35.   0.
    0.]
 [ 45.  10.  10.  45.   0.  45.  10.  25.   0.   0.  45.  10.  25.   0.
    0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.]
 [ 81.  18.  18.  81.   0.  81.  18.  45.   0.   0.  81.  18.  45.   0.
    0.]
 [ 18.   4.   4.  18.   0.  18.   4.  10.   0.   0.  18.   4.  10.   0

In [25]:
a = [[1,2,3],[4,5,6]]
b = [[1,2],[4,5],[3,6]]
a=np.array(a)
b=np.array(b)
x1 = [9, 2, 5, 0, 0, 7, 5, 0, 0, 0, 9, 2, 5, 0, 0] 
x2 = [9, 2, 2, 9, 0, 9, 2, 5, 0, 0, 9, 2, 5, 0, 0]
x1=np.array(x1)
x2=np.array(x2)
dot=np.dot(a,b)
dot1=np.dot(x1,x2.T)
dot2=np.dot(x1,x2)
print(dot)
print(dot1)
print(dot2)
dot3 = np.dot(x1.T,x2.T)
print(dot3)
w=np.array([[1],[2]])
Y=np.array([[1],[1]])
#print(np.dot(w,Y))
print(np.dot(w,Y.T))
print(np.dot(w.T,Y))

[[18 30]
 [42 69]]
278
278
278
[[1 1]
 [2 2]]
[[3]]


In [12]:
### VECTORIZED DOT PRODUCT OF VECTORS ### 
tic = time.process_time() 
#dot矩阵乘法：对应相乘再求和
dot = np.dot(x1,x2) 
toc = time.process_time() 
print ("dot = " + str(dot) + "\n ----- Computation time = " + 
       str(1000*(toc - tic)) + "ms"),
### VECTORIZED OUTER PRODUCT ### 
tic = time.process_time() 
#outer是a的第一个元素跟b的每一个元素相乘作为第一行，第二个元素跟b的每一个元素相乘作为第二个元素...
outer = np.outer(x1,x2) 
toc = time.process_time() 
print ("outer = " + str(outer) + "\n ----- Computation time = " + 
       str(1000*(toc - tic)) + "ms"),
### VECTORIZED ELEMENTWISE MULTIPLICATION ### 
tic = time.process_time() 
#multiply是对应位相乘(但不求和,sum(np.multiply(x1,x2))=np.dot(x1,x2))
mul = np.multiply(x1,x2) 
toc = time.process_time() 
print ("elementwise multiplication = " + str(mul) + "\n ----Computation time = " + 
       str(1000*(toc - tic)) + "ms"),
### VECTORIZED GENERAL DOT PRODUCT ### 
tic = time.process_time() 
dot = np.dot(W,x1)
toc = time.process_time() 
print ("gdot = " + str(dot) + "\n ----- Computation time = " + 
       str(1000*(toc - tic)) + "ms"), 
#print( np.sum(np.multiply(W,x1),axis = 1,keepdims = True))
#np.max(dot)

dot = 278
 ----- Computation time = 0.0ms
outer = [[81 18 18 81  0 81 18 45  0  0 81 18 45  0  0]
 [18  4  4 18  0 18  4 10  0  0 18  4 10  0  0]
 [45 10 10 45  0 45 10 25  0  0 45 10 25  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [63 14 14 63  0 63 14 35  0  0 63 14 35  0  0]
 [45 10 10 45  0 45 10 25  0  0 45 10 25  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [81 18 18 81  0 81 18 45  0  0 81 18 45  0  0]
 [18  4  4 18  0 18  4 10  0  0 18  4 10  0  0]
 [45 10 10 45  0 45 10 25  0  0 45 10 25  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]]
 ----- Computation time = 0.0ms
elementwise multiplication = [81  4 10  0  0 63 10  0  0  0 81  4 25  0  0]
 ----Computation time = 0.0ms
gdot = [ 21.10006234  17.25118221  25.76369377]
 ----- Computation time = 15.625ms


(None,)

## L1 loss is defined as:
* L1(ˆy,y) =∑ |y(i) − ˆ y(i)| 

In [13]:
# GRADED FUNCTION: L1 
def L1(yhat, y): 
    """ 
    Arguments: 
    yhat -- vector of size m (predicted labels) 
    y -- vector of size m (true labels)
    Returns: 
    loss -- the value of the L1 loss function defined above 
    """
    loss = sum(abs(y-yhat))
    return loss

In [14]:
yhat = np.array([.9, 0.2, 0.1, .4, .9]) 
y = np.array([1, 0, 0, 1, 1]) 
print("L1 = " + str(L1(yhat,y))) 

L1 = 1.1


# L2 loss is defined as 
* L2(ˆy,y) = ∑(y(i)-^y(i))^2
* then “np.dot(x,x)” =∑x(i)^2

In [33]:
# GRADED FUNCTION: L2 
def L2(yhat, y): 
    """ 
    Arguments: 
    yhat -- vector of size m (predicted labels) 
    y -- vector of size m (true labels)
    Returns: 
    loss -- the value of the L2 loss function defined above 
    """
    loss = np.dot(y-yhat,y-yhat)
    return loss

In [34]:
yhat = np.array([.9, 0.2, 0.1, .4, .9]) 
y = np.array([1, 0, 0, 1, 1]) 
print("L2 = " + str(L2(yhat,y))) 

L2 = 0.43
