In [1]:
import tensorflow as tf

#######################
# Reference site: https://towardsdatascience.com/beginners-guide-to-building-neural-networks-in-tensorflow-dab7a09b941d
#######################

# Use the CIFAR100 dataset(32*32) provided in keras for test
# Note that Mobile Net limits the size of small crops so this shouldn't be appropriate in real apps
cifar100 = tf.keras.datasets.cifar100
(x_train, y_train), (x_test, y_test) = cifar100.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# keras layers default input shape: (batch, height, width, channels)
# or else need to declare
# data_format = channels_first

In [2]:
x_train.shape

(50000, 32, 32, 3)

In [3]:
y_train.shape

(50000, 1)

In [4]:
# dizzy with tf.get_Variable, name scope, etc. I used keras instead
# change to tf.nn.con2d / tf.nn.depthwise_con2d, etc. if necessary

# Set hyperparameters
alpha = 1
rho = 1

# build the moblie net architecture
model = tf.keras.models.Sequential([
    # input conv
        # input shape need to be declared (for test, 32*32; for ImageNet, 224*224)
    tf.keras.layers.Conv2D(input_shape=(32, 32, 3),filters=32*alpha,kernel_size=3,strides=(2,2),padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    
    # 1 depthwise separable conv
    tf.keras.layers.DepthwiseConv2D(kernel_size=3,strides=(1,1),padding="same",depth_multiplier=1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=64*alpha,kernel_size=1,strides=(1,1),padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    
    # 2
    tf.keras.layers.DepthwiseConv2D(kernel_size=3,strides=(2,2),padding="same",depth_multiplier=1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=128*alpha,kernel_size=1,strides=(1,1),padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    
    # 3
    tf.keras.layers.DepthwiseConv2D(kernel_size=3,strides=(1,1),padding="same",depth_multiplier=1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=128*alpha,kernel_size=1,strides=(1,1),padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    
    # 4
    tf.keras.layers.DepthwiseConv2D(kernel_size=3,strides=(2,2),padding="same",depth_multiplier=1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=256*alpha,kernel_size=1,strides=(1,1),padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    
    # 5
    tf.keras.layers.DepthwiseConv2D(kernel_size=3,strides=(1,1),padding="same",depth_multiplier=1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=256*alpha,kernel_size=1,strides=(1,1),padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    
    # 6
    tf.keras.layers.DepthwiseConv2D(kernel_size=3,strides=(2,2),padding="same",depth_multiplier=1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=512*alpha,kernel_size=1,strides=(1,1),padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    
    # 7*5
    tf.keras.layers.DepthwiseConv2D(kernel_size=3,strides=(1,1),padding="same",depth_multiplier=1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=512*alpha,kernel_size=1,strides=(1,1),padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    
    tf.keras.layers.DepthwiseConv2D(kernel_size=3,strides=(1,1),padding="same",depth_multiplier=1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=512*alpha,kernel_size=1,strides=(1,1),padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    
    tf.keras.layers.DepthwiseConv2D(kernel_size=3,strides=(1,1),padding="same",depth_multiplier=1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=512*alpha,kernel_size=1,strides=(1,1),padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    
    tf.keras.layers.DepthwiseConv2D(kernel_size=3,strides=(1,1),padding="same",depth_multiplier=1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=512*alpha,kernel_size=1,strides=(1,1),padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    
    tf.keras.layers.DepthwiseConv2D(kernel_size=3,strides=(1,1),padding="same",depth_multiplier=1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=512*alpha,kernel_size=1,strides=(1,1),padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    
    # 8
    tf.keras.layers.DepthwiseConv2D(kernel_size=3,strides=(2,2),padding="same",depth_multiplier=1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=1024*alpha,kernel_size=1,strides=(1,1),padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    
    # 9 -- Problelm: why the size of output doesn't change with stride = 2 in the article ???
    # at this layer, the output size should be 1/2 after passing the depthwise conv
    tf.keras.layers.DepthwiseConv2D(kernel_size=3,strides=(2,2),padding="same",depth_multiplier=1),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Conv2D(filters=1024*alpha,kernel_size=1,strides=(1,1),padding="same"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.ReLU(),
    
    # pooling : pool_size according to the output size of #9, so that the result is (1,1,1024)
        # In article, input(224,224) -> pool_size=(7,7)
        # test phase, input(32,32) -> pool_size=(1,1)
    tf.keras.layers.AveragePooling2D(pool_size=(1,1)),
    
    # FC layer, 1024 -> class #
        # In article, ImageNet data set has 1000 classes
        # For test, 100 classes
    tf.keras.layers.Dense(100, activation='softmax')
])

# show the net structure and parameters
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 16, 16, 32)        896       
_________________________________________________________________
batch_normalization (BatchNo (None, 16, 16, 32)        128       
_________________________________________________________________
re_lu (ReLU)                 (None, 16, 16, 32)        0         
_________________________________________________________________
depthwise_conv2d (DepthwiseC (None, 16, 16, 32)        320       
_________________________________________________________________
batch_normalization_1 (Batch (None, 16, 16, 32)        128       
_________________________________________________________________
re_lu_1 (ReLU)               (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 16, 64)        2

In [5]:
# Optimization step 
#(or called compile the model in keras)

# Note that 'rmsprop' is using the default params, to change, use code below
#rmsprop = keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9)

model.compile(optimizer='rmsprop',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [6]:
# Training
# just runned 1 epoch, loss 5.3 -> 4.6

model.fit(x_train, y_train, epochs=1)

Train on 50000 samples


<tensorflow.python.keras.callbacks.History at 0x1506dd49240>