In [46]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, BatchNormalization, Activation, ZeroPadding2D, Input
from tensorflow.keras.layers import GlobalAveragePooling2D, Add, Dense 
from tensorflow.keras import backend
from tensorflow.keras.models import Model 

In [47]:
# 使结果可以被8整除
def _make_divisible(v, divisor, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v

In [48]:
def relu6(x):
    return backend.relu(x, max_value=6)

In [49]:
def _make_divisible(v, divisor, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v

In [50]:
def correct_pad(inputs, kernel_size):
    img_dim = 1
    input_size = backend.int_shape(inputs)[img_dim:(img_dim + 2)]

    if isinstance(kernel_size, int):
        kernel_size = (kernel_size, kernel_size)

    if input_size[0] is None:
        adjust = (1, 1)
    else:
        adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)

    correct = (kernel_size[0] // 2, kernel_size[1] // 2)

    return ((correct[0] - adjust[0], correct[0]),
            (correct[1] - adjust[1], correct[1]))

In [51]:
def _invert_res_blocks(inputs, expansion, stride, alpha, filters, block_id):
    # with different alpha, we can creat different net,
    in_channels = backend.int_shape(inputs)[-1]
    pointwise_conv_filters = int(filters * alpha)
    pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
    
    x = inputs
    prefix = 'block_{}_'.format(block_id)
    
    # 用（1， 1）size的卷积核进行数据升维
    if block_id:
        x = Conv2D(filters=expansion * in_channels,
                  kernel_size=1, 
                  padding='same',
                  use_bias=False,
                  activation=None,
                  name=prefix + 'expand')(x)
        
        x = BatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'expand_BN')(x)
        x = Activation(relu6, name=prefix + 'expand_relu')(x)
        
    else:
        prefix = 'expanded_conv_'
        
    if stride==2:
        x = ZeroPadding2D(padding=correct_pad(x, 3),
                                 name=prefix + 'pad')(x)
        
    # 深度可分离卷积
    x = DepthwiseConv2D(kernel_size=3,
                               strides=stride,
                               activation=None,
                               use_bias=False,
                               padding='same' if stride == 1 else 'valid',
                               name=prefix + 'depthwise')(x)
    x = BatchNormalization(epsilon=1e-3,
                                  momentum=0.999,
                                  name=prefix + 'depthwise_BN')(x)
    x = Activation(relu6, name=prefix + 'depthwise_relu')(x)
    
    # 用（1， 1）size的卷积核进行数据降维，并不用relu层，防止破坏特征
    x = Conv2D(pointwise_filters,
                  kernel_size=1,
                  padding='same',
                  use_bias=False,
                  activation=None,
                  name=prefix + 'project')(x)
    
    x = BatchNormalization(epsilon=1e-3,
                                  momentum=0.999,
                                  name=prefix + 'project_BN')(x)

    if in_channels == pointwise_filters and stride == 1:
        return Add(name=prefix + 'add')([inputs, x])
    return x

In [52]:
def MobilenetV2(input_shape=[224,224,3],
                alpha=1.0,
                include_top=True,
                classes=1000):
    
    rows = input_shape[0]
    
    img_input = Input(shape=input_shape)
    
    # 224,224,3 -> 112,112,32
    first_block_filters = _make_divisible(32 * alpha, 8)
    x = ZeroPadding2D(padding=correct_pad(img_input, 3),
                             name='Conv1_pad')(img_input)
    x = Conv2D(first_block_filters,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='valid',
                      use_bias=False,
                      name='Conv1')(x)
    x = BatchNormalization(epsilon=1e-3,
                                  momentum=0.999,
                                  name='bn_Conv1')(x)
    x = Activation(relu6, name='Conv1_relu')(x)
    
    # 112,112,32 -> 112,112,16
    x = _invert_res_blocks(x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0)
    
    # 112,112,16 -> 56,56,24
    x = _invert_res_blocks(x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1)
    x = _invert_res_blocks(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2)
    
    # 56,56,24 -> 28,28,32 
    x = _invert_res_blocks(x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3)
    x = _invert_res_blocks(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4)
    x = _invert_res_blocks(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5)
    
    # 28,28,32 -> 14,14,64
    x = _invert_res_blocks(x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6)
    x = _invert_res_blocks(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7)
    x = _invert_res_blocks(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8)
    x = _invert_res_blocks(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9)
    
    # 14,14,64 -> 14,14,96
    x = _invert_res_blocks(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10)
    x = _invert_res_blocks(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11)
    x = _invert_res_blocks(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12)
    
    # 14,14,96 -> 7,7,160
    x = _invert_res_blocks(x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13)
    x = _invert_res_blocks(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14)
    x = _invert_res_blocks(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15)
    
    # 7,7,160 -> 7,7,320
    x = _invert_res_blocks(x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16)
    
    if alpha > 1.0:
        last_block_filters = _make_divisible(1280 * alpha, 8)
    else:
        last_block_filters = 1280
    
    # 7,7,320 -> 7,7,1280
    x = Conv2D(last_block_filters, kernel_size=1, use_bias=False, name='Conv_1')(x)
    x = BatchNormalization(epsilon=1e-3,
                              momentum=0.999,
                              name='Conv_1_bn')(x)
    x = Activation(relu6, name='out_relu')(x)
    
    # 7,7,1280 -> 1,1,1280
    x = GlobalAveragePooling2D()(x)
    x = Dense(classes, activation='softmax',
                    use_bias=True, name='Logits')(x)
    inputs = img_input

    model = Model(inputs, x, name='mobilenetv2_%0.2f_%s' % (alpha, rows))

    return model

In [53]:
model = MobilenetV2(input_shape=(224, 224, 3))
model.summary()

Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_7[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
_______________________________________________________________________________