# Build model layer by layer here

In [1]:
import tensorflow as tf

## First, define layer module that we will use.

Here we define max-pooling layer and convolutional layer. 

Kernal size, strides and padding in max-pooling layer are fixed while in conv. layer are parametric.

In [2]:
def max_pool_2x2(inputs):
    return tf.nn.max_pool(inputs, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], 
                          padding = 'VALID')

def add_conv_layer(inputs, in_size, out_size, k_size, n_layer, activation_function=None):
    layer_name = 'conv%s' % n_layer
    with tf.name_scope(layer_name):
        Weights = tf.get_variable('weights_%s'%n_layer,
                                  shape = [k_size, k_size, in_size, out_size],
                                  dtype = tf.float32, 
                                  initializer=tf.truncated_normal_initializer(
                                      stddev=0.1,dtype=tf.float32))
        #tf.histogram_summary(layer_name + '/weights', Weights)
            
        biases = tf.get_variable('biases_%s'%n_layer,
                                 shape=[out_size],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        #tf.histogram_summary(layer_name + '/biases', biases)
                 
        pre_act = tf.nn.bias_add(tf.nn.conv2d(inputs, Weights, strides=[1, 1, 1, 1], 
                                              padding='SAME'), biases)
        
        if activation_function is None:
            conv_layer = pre_act
        else:
            conv_layer = activation_function(pre_act)
           
        #tf.histogram_summary(layer_name + '/outputs', outputs)        
    return conv_layer

## Second, build the net!

I didn't define fc layer as a module.
Because the usage of fc layer is not as much as conv. layer and max-pooling. 

Below shows a net with 8 conv. layers followed by max-pooling every 2 conv. layers and 3 fc layers.

Note that the idea of this structure is coming from VGG.

In [3]:
def inference(images, batch_size, n_classes, on_train):
    '''
    Args:
        images: image batch, 4D tensor, tf.float32, [batch_size, width, height, channels]
    Returns:
        output tensor with the computed logits, float, [batch_size, n_classes]
    '''
    #inputs, in_size, out_size, k_size, n_layer, activation_function=None, bn=None
    conv1 = add_conv_layer(images, 3, 32, 3, n_layer=1, activation_function=tf.nn.relu, bn=None) 
    conv2 = add_conv_layer(conv1, 32, 32, 3, n_layer=2, activation_function=tf.nn.relu, bn=None) 
    pool2 = max_pool_2x2(conv2)

    conv3 = add_conv_layer(pool2, 32, 64, 3, n_layer=3, activation_function=tf.nn.relu, bn=None) 
    conv4 = add_conv_layer(conv3, 64, 64, 3, n_layer=4, activation_function=tf.nn.relu, bn=None) 
    pool4 = max_pool_2x2(conv4)

    conv5 = add_conv_layer(pool4, 64, 128, 3, n_layer=5, activation_function=tf.nn.relu, bn=None) 
    conv6 = add_conv_layer(conv5, 128,128, 3, n_layer=6, activation_function=tf.nn.relu, bn=None) 
    pool6 = max_pool_2x2(conv6)

    conv7 = add_conv_layer(pool6, 128, 256, 3, n_layer=7,  activation_function=tf.nn.relu, bn=None) 
    conv8 = add_conv_layer(conv7, 256, 256, 3, n_layer=8, activation_function=tf.nn.relu, bn=None) 
    pool8 = max_pool_2x2(conv8)

    #fc9  ,   14*14*256 -> 4096
    with tf.variable_scope('fc9') as scope:
        reshape = tf.reshape(pool8, shape=[batch_size, -1])
        dim = reshape.get_shape()[1].value

        #Sophie added for testing
        print(reshape.get_shape)
        print(dim)

        weights = tf.get_variable('weights',
                                  shape=[dim,4096],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[4096],
                                 dtype=tf.float32, 
                                 initializer=tf.constant_initializer(0.1))
        act_fc9 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
        fc9 = tf.nn.dropout(act_fc9, keep_prob=0.5)    
        
    #fc10 
    with tf.variable_scope('fc10') as scope:
        weights = tf.get_variable('weights',
                                  shape=[4096,1024],
                                  dtype=tf.float32, 
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[1024],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        act_fc10 = tf.nn.relu(tf.matmul(fc9, weights) + biases, name='fc10')
        fc10 = tf.nn.dropout(act_fc10, keep_prob=0.5)
        
        
    # fc11 + softmax  1024 -> 3
    with tf.variable_scope('fc11_softmax') as scope:
        weights = tf.get_variable('softmax_linear',
                                  shape=[1024, n_classes],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases', 
                                 shape=[n_classes],
                                 dtype=tf.float32, 
                                 initializer=tf.constant_initializer(0.1))
        softmax_linear = tf.add(tf.matmul(fc10, weights), biases, name='softmax_linear')
        #prediction = tf.nn.softmax(softmax_linear) 
    
    #return prediction
    return softmax_linear