<font color=#FF0000 size=4 face="黑体">Package import</font>

In [None]:
import math
import numpy as np
import h5py
import matplotlib.pyplot as plt
import tensorflow as tf
import Ipynb_importer
from tensorflow.python.framework import ops

<font color=#FF0000 size=4 face="黑体">Random minibatches</font>

In [None]:
def random_mini_batches(X, Y, mini_batch_size = 64):
    """
    shuffle and partition
    Returns:
    mini_batches -- lis of synchronous (mini_batch_X, mini_batch_Y)
    """
    np.random.random()
    m = X.shape[1]
    mini_batches = []
    
    # shuffle
    permutation = list(np.random.permutation(m))
    shuffled_X = X[:, permutation]
    shuffled_Y = Y[:, permutation]
    
    # Partition
    num_complete_minibatches = math.floor(m/mini_batch_size)
    
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[:, k * mini_batch_size : (k + 1) * mini_batch_size]
        mini_batch_Y = shuffled_Y[:, k * mini_batch_size : (k + 1) * mini_batch_size]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
        
    if m % mini_batch_size != 0: # handling with the end case
        mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches

<font color=#FF0000 size=4 face="黑体">Creat placeholders</font>

In [None]:
def create_placeholders(n_x, n_y):
    """
    n_x -- scalar, size of an image vertor (num_px * num_py * 3)
    n_y -- scalar, number of classes
    
    Returns:
    X -- placeholder for the data input
    Y -- placeholder for the data output
    """
    X = tf.placeholder(tf.float32, shape = [n_x, None], name = "X")
    Y = tf.placeholder(tf.float32, shape = [n_y, None], name = "Y")
    
    return X, Y

<font color=#FF0000 size=4 face="黑体">Initializing parameters</font>

In [None]:
def initialize_parameters(layer_dims):
    """
    layer_dims -- python array (list) containing the dimensions of each layer in our network
    """
    parameters == {}
    L = len(layer_dims)
    
    for l in range(1, L):
        parameters['W', str(l)] = tf.get_variable("W" + str(l), [layer_dims[l], layer_dims[l - 1]], initializer = tf.contrib.layers.xavier_initializer())
        parameters['b', str(l)] = tf.get_variable("W" + str(l), [layer_dims[l], layer_dims[l - 1]], initializer = tf.zeros_initializer())
        
        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l - 1]))
    
    return parameters

<font color=#FF0000 size=4 face="黑体">Forward propagation in tf</font>

In [None]:
def forward_propagation(X, parameters):
    """
    Linear -> Relu -> Linear -> Relu -> ... -> softmax
    """
    
    L = len(parameters)
    
    Zs = {}
    Activation = {}
    Activation["A0"] = X
    for l in range(1, L):
        Zs["Z" + str(l)] = tf.add(tf.matmul(parameters["W" + str(l)], Activation["A" + str(l - 1)]), parameters["b" + str(l)])
        Activation["A" + str(l)] = tf.nn.relu(Zs["Z" + str(l)])
    
    Zs["Z" + str(L)] = tf.add(tf.matmul(parameters["W" + str(L)], Activation["A" + str(L - 1)]), parameters["W" + str(L)])
    
    return Zs["Z" + str(L)]

<font color=#FF0000 size=4 face="黑体">Compute cost</font>

In [None]:
def compute_cost(ZL, Y):
    
    logits = tf.transpose(ZL)
    labels = tf.transpose(Y)
    
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = labels))
    
    return cost

<font color=#FF0000 size=4 face="黑体">Model</font>

In [None]:
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001, num_epochs = 1500, minibatch_size = 32, print_cost = True):
    """
    Returns:
    parameters -- parameters learnt by the model. They can be used to predict
    """
    ops.reset_default_graph() # to be able to rerun the model without overwirting tf variables
    (n_x, m) = X_train.shape
    n_y = Y_train.shape[0]
    costs = []
    
    X, Y = create_placeholders(n_x, n_y)
    parameters = initialize_parameters(layer_dims)
    
    ZL = forward_propagation(X, parameters)
    cost = compute(ZL, Y)
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)
        
        for epoch in range(num_epochs):
            epoch_cost = 0
            num_minibatches = int(m / minibatch_size)
            minibatches  = random_mini_batches(X_train, Y_train, minibatch_size)
            
            for minibatch in minibatches:
                (minibatch_X, minibatch_Y) = minibatch
                _, minibatch_cost = sess.run([optimizer, cost], feel_dict = {X: minibatch_X, Y: minibatch_Y})
                epoch_cost += minibatch_cost/num_minibatches
                
            if(print_cost == True and epoch % 100 == 0):
                    print("Cost after epoch %i: %f" % (epoch, epoch_cost))
            if(print_cost == True and epoch % 5 ==0):
                    costs.append(epoch_cost)
        # plot
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per tens)')
        plt.title("Learning rate" + str(learning_rate))
        plt.show()

        # lets save the parameters in a variable
        parameters = sess.run(parameters)
        print("Parameters have been trained!")
        # calculate accuracy on correct predictions
        correct_prediction = tf.equal(tf.argmax(ZL), tf.argmax(Y))
        
        # calculate the accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print("Traning accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
        print("Traning accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
    
        return parameters