参考链接：
- [5-4，模型层layers](https://www.kesci.com/home/project/5ea7d3b2564b12002c09dc57)

## 自定义模型层

如果自定义模型层没有需要被训练的参数，一般推荐使用Lamda层实现。

如果自定义模型层有需要被训练的参数，则可以通过对Layer基类子类化实现。

Lamda层由于没有需要被训练的参数，只需要定义正向传播逻辑即可，使用比Layer基类子类化更加简单。

Lamda层的正向逻辑可以使用Python的lambda函数来表达，也可以用def关键字定义函数来表达。

In [1]:
import tensorflow as tf
from tensorflow.keras import layers,models,regularizers

tf.__version__

'2.1.0'

In [2]:
mypower = layers.Lambda(lambda x:tf.math.pow(x,2))
mypower(tf.range(5))

<tf.Tensor: shape=(5,), dtype=int32, numpy=array([ 0,  1,  4,  9, 16])>

Layer的子类化一般需要重新实现初始化方法，Build方法和Call方法。下面是一个简化的线性层的范例，类似Dense.

In [3]:
class Linear(layers.Layer):
    def __init__(self,units=32,**kwargs):
        super(Linear,self).__init__(**kwargs)
        self.units=units
    def build(self,input_shape):
        self.w=self.add_weight(shape=(input_shape[-1],self.units),
                               initializer='random_normal',
                               trainable=True)
        self.b=self.add_weight(shape=(self.units,),
                              initializer='random_normal',
                              trainable=True)
        super(Linear,self).build(input_shape)
    def call(self,inputs):
        """
        call方法一般定义定义正向1传播运算逻辑，__call__方法调用此方法
        """
        return tf.matmul(inputs,self.w)+self.b
    
    def get_config(self):
        config=super(Linear,self).get_config()
        config.update({'units':self.units})
        return config

In [4]:
linear = Linear(units = 8)
print(linear.built)
#指定input_shape，显式调用build方法，第0维代表样本数量，用None填充
linear.build(input_shape = (None,16)) 
print(linear.built)

False
True


In [5]:
linear = Linear(units = 8)
print(linear.built)
linear.build(input_shape = (None,16)) 
print(linear.compute_output_shape(input_shape = (None,16)))

False
(None, 8)


In [6]:
linear = Linear(units = 16)
print(linear.built)
#如果built = False，调用__call__时会先调用build方法, 再调用call方法。
linear(tf.random.uniform((100,64))) 
print(linear.built)
config = linear.get_config()
print(config)

False
True
{'name': 'linear_2', 'trainable': True, 'dtype': 'float32', 'units': 16}


In [7]:
tf.keras.backend.clear_session()

model = models.Sequential()
#注意该处的input_shape会被模型加工，无需使用None代表样本数量维
model.add(Linear(units = 16,input_shape = (64,)))  
print("model.input_shape: ",model.input_shape)
print("model.output_shape: ",model.output_shape)
model.summary()

model.input_shape:  (None, 64)
model.output_shape:  (None, 16)
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
linear (Linear)              (None, 16)                1040      
Total params: 1,040
Trainable params: 1,040
Non-trainable params: 0
_________________________________________________________________


In [9]:
## 查看参数名称
linear = Linear(units = 16)


<__main__.Linear at 0xdadb208>

In [11]:
linear(tf.ones([6, 5]))

<tf.Tensor: shape=(6, 16), dtype=float32, numpy=
array([[-0.04333059, -0.08049949, -0.03249026,  0.05443351, -0.07544892,
        -0.08854784, -0.19290403, -0.01550727,  0.02320928,  0.2550611 ,
        -0.15202959, -0.04553121, -0.07346585,  0.23583043,  0.03007973,
        -0.06857169],
       [-0.04333059, -0.08049949, -0.03249026,  0.05443351, -0.07544892,
        -0.08854784, -0.19290403, -0.01550727,  0.02320928,  0.2550611 ,
        -0.15202959, -0.04553121, -0.07346585,  0.23583043,  0.03007973,
        -0.06857169],
       [-0.04333059, -0.08049949, -0.03249026,  0.05443351, -0.07544892,
        -0.08854784, -0.19290403, -0.01550727,  0.02320928,  0.2550611 ,
        -0.15202959, -0.04553121, -0.07346585,  0.23583043,  0.03007973,
        -0.06857169],
       [-0.04333059, -0.08049949, -0.03249026,  0.05443351, -0.07544892,
        -0.08854784, -0.19290403, -0.01550727,  0.02320928,  0.2550611 ,
        -0.15202959, -0.04553121, -0.07346585,  0.23583043,  0.03007973,
        -

In [14]:
linear.trainable_variables

[<tf.Variable 'linear_2/Variable:0' shape=(5, 16) dtype=float32, numpy=
 array([[ 2.59076003e-02, -5.27851991e-02, -7.70981088e-02,
          6.03635125e-02, -8.42469931e-02, -3.68040651e-02,
         -3.97913111e-03, -9.20438543e-02, -7.15096444e-02,
          3.81210484e-02,  1.47387898e-02, -2.99345609e-02,
         -4.81296033e-02, -2.19797827e-02, -8.68842155e-02,
         -4.15231250e-02],
        [-4.94861379e-02, -4.48313765e-02,  5.66767342e-02,
         -2.03403868e-02,  6.25559092e-02, -5.30785806e-02,
         -4.56362963e-02,  3.68531793e-02,  4.89867106e-02,
          1.23078756e-01,  2.56472686e-03,  3.13064791e-02,
          5.30909076e-02,  9.74147115e-03, -1.73773952e-02,
         -9.05219018e-02],
        [ 5.86021207e-02,  2.76081054e-03,  5.44458888e-02,
         -4.49273176e-02, -7.94086512e-03,  1.87071841e-02,
         -6.47474825e-02,  3.93426083e-02, -1.52370557e-02,
          5.12947813e-02, -3.37163918e-02, -4.54051122e-02,
         -3.03274523e-02,  1.17994

##  自定义模型

机器学习模型中有很多是通过叠加不同的结构层组合而成的，如resnet的每个残差块就是“卷积+批标准化+残差连接”的组合。

在tensorflow2中要创建一个包含多个网络层的的结构，一般继承与tf.keras.Model类。

In [18]:
class ResnetBlock(tf.keras.Model):
    def __init__(self, kernel_size, filters):
            super(ResnetBlock, self).__init__(name='resnet_block')

            # 每个子层卷积核数
            filter1, filter2, filter3 = filters

            # 三个子层，每层1个卷积加一个批正则化
            # 第一个子层， 1*1的卷积
            self.conv1 = tf.keras.layers.Conv2D(filter1, (1,1))
            self.bn1 = tf.keras.layers.BatchNormalization()
            # 第二个子层， 使用特点的kernel_size
            self.conv2 = tf.keras.layers.Conv2D(filter2, kernel_size, padding='same')
            self.bn2 = tf.keras.layers.BatchNormalization()
            # 第三个子层，1*1卷积
            self.conv3 = tf.keras.layers.Conv2D(filter3, (1,1))
            self.bn3 = tf.keras.layers.BatchNormalization()

    def call(self, inputs, training=False):

        # 堆叠每个子层
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)

        x = self.conv2(x)
        x = self.bn2(x, training=training)

        x = self.conv3(x)
        x = self.bn3(x, training=training)

        # 残差连接
        x += inputs
        outputs = tf.nn.relu(x)
        return outputs
resnetBlock = ResnetBlock(2, [6,4,9])
# 数据测试
print(resnetBlock(tf.ones([1,3,9,9])))
# 查看网络中的变量名
print([x.name for x in resnetBlock.trainable_variables])


tf.Tensor(
[[[[2.1565793  1.1436929  2.7448354  2.9193456  2.0856676  1.0356885
    1.9981244  3.1337562  2.5819607 ]
   [2.1565793  1.1436929  2.7448354  2.9193456  2.0856676  1.0356885
    1.9981244  3.1337562  2.5819607 ]
   [2.1565793  1.1436929  2.7448354  2.9193456  2.0856676  1.0356885
    1.9981244  3.1337562  2.5819607 ]
   [2.1565793  1.1436929  2.7448354  2.9193456  2.0856676  1.0356885
    1.9981244  3.1337562  2.5819607 ]
   [2.1565793  1.1436929  2.7448354  2.9193456  2.0856676  1.0356885
    1.9981244  3.1337562  2.5819607 ]
   [2.1565793  1.1436929  2.7448354  2.9193456  2.0856676  1.0356885
    1.9981244  3.1337562  2.5819607 ]
   [2.1565793  1.1436929  2.7448354  2.9193456  2.0856676  1.0356885
    1.9981244  3.1337562  2.5819607 ]
   [2.1565793  1.1436929  2.7448354  2.9193456  2.0856676  1.0356885
    1.9981244  3.1337562  2.5819607 ]
   [1.653277   0.96290684 1.9400918  1.8662715  1.7643625  1.282055
    1.5997902  2.1563993  2.1351726 ]]

  [[2.1565793  1.1436929 