# Custom layers

We recommend using `tf.keras` as a high-level API for building neural networks. That said, most TensorFlow APIs are usable with eager execution.


In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

tf.enable_eager_execution()

## Layers: common sets of useful operations

In [2]:
# Most layers take as a first argument the number of output dimensions / channels.
layer = tf.keras.layers.Dense(100)

# The number of input dimensions
layer = tf.keras.layers.Dense(10, input_shape=(None, 5))

The full list of pre-existing layers can be seen in [the documentation](https://www.tensorflow.org/api_docs/python/tf/keras/layers). It includes Dense (a fully-connected layer),
Conv2D, LSTM, BatchNormalization, Dropout, and many others.

In [3]:
# To use a layer, simply call it.
layer(tf.zeros([10, 5]))

<tf.Tensor: id=29, shape=(10, 10), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>

you can inspect all variables in a layer using `layer.variables` and trainable variables using `layer.trainable_variables`. 
In this case a fully-connected layer will have variables for weights and biases.

In [4]:
layer.variables

[<tf.Variable 'dense_1/kernel:0' shape=(5, 10) dtype=float32, numpy=
 array([[ 0.59892625, -0.12420213, -0.37856987, -0.4220448 ,  0.2620601 ,
          0.24742913,  0.5062179 ,  0.18849277, -0.60822386,  0.18964994],
        [-0.57287693,  0.5830808 , -0.31818697,  0.42750412,  0.29321223,
          0.36999363,  0.09884048, -0.13540187,  0.60259765, -0.15833962],
        [-0.39063483,  0.03117055, -0.59842604,  0.5551414 , -0.6094775 ,
          0.15491366,  0.12911665, -0.01340908,  0.5157196 , -0.62702394],
        [-0.4998048 , -0.12701422, -0.3110964 ,  0.28108442,  0.29559332,
         -0.18692365,  0.01729536, -0.11286175,  0.18293965, -0.01327622],
        [ 0.3332253 , -0.5219369 ,  0.42286414,  0.46053463, -0.10966367,
         -0.14447728, -0.29939833, -0.13547423,  0.36642414, -0.0299027 ]],
       dtype=float32)>,
 <tf.Variable 'dense_1/bias:0' shape=(10,) dtype=float32, numpy=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>]

In [5]:
# The variables are also accessible through nice accessors
layer.kernel, layer.bias

(<tf.Variable 'dense_1/kernel:0' shape=(5, 10) dtype=float32, numpy=
 array([[ 0.59892625, -0.12420213, -0.37856987, -0.4220448 ,  0.2620601 ,
          0.24742913,  0.5062179 ,  0.18849277, -0.60822386,  0.18964994],
        [-0.57287693,  0.5830808 , -0.31818697,  0.42750412,  0.29321223,
          0.36999363,  0.09884048, -0.13540187,  0.60259765, -0.15833962],
        [-0.39063483,  0.03117055, -0.59842604,  0.5551414 , -0.6094775 ,
          0.15491366,  0.12911665, -0.01340908,  0.5157196 , -0.62702394],
        [-0.4998048 , -0.12701422, -0.3110964 ,  0.28108442,  0.29559332,
         -0.18692365,  0.01729536, -0.11286175,  0.18293965, -0.01327622],
        [ 0.3332253 , -0.5219369 ,  0.42286414,  0.46053463, -0.10966367,
         -0.14447728, -0.29939833, -0.13547423,  0.36642414, -0.0299027 ]],
       dtype=float32)>,
 <tf.Variable 'dense_1/bias:0' shape=(10,) dtype=float32, numpy=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>)

## Implementing custom layers
The best way to implement your own layer is extending the tf.keras.Layer class and implementing:
  *  `__init__` , where you can do all input-independent initialization
  * `build`, where you know the shapes of the input tensors and can do the rest of the initialization
  * `call`, where you do the forward computation

Note that you don't have to wait until `build` is called to create your variables, you can also create them in `__init__`. However, the advantage of creating them in `build` is that it enables late variable creation based on the shape of the inputs the layer will operate on. On the other hand, creating variables in `__init__` would mean that shapes required to create the variables will need to be explicitly specified.

In [6]:
class MyDenseLayer(tf.keras.layers.Layer):
    def __init__(self, num_outputs):
        super(MyDenseLayer, self).__init__()
        self.num_outputs = num_outputs

    def build(self, input_shape):
        self.kernel = self.add_variable("kernel", shape=[int(input_shape[-1]), self.num_outputs])

    def call(self, input):
        return tf.matmul(input, self.kernel)

    
layer = MyDenseLayer(10)

print(layer(tf.zeros([10, 5])))
print(layer.trainable_variables)

tf.Tensor(
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]], shape=(10, 10), dtype=float32)
[<tf.Variable 'my_dense_layer/kernel:0' shape=(5, 10) dtype=float32, numpy=
array([[ 0.09471142, -0.16021302,  0.30938947,  0.27562535, -0.41273797,
         0.36257374, -0.26864353, -0.43193567, -0.11063308, -0.35333395],
       [ 0.2686835 ,  0.10002691,  0.06491792, -0.42869312, -0.43786573,
         0.42726666, -0.56102765,  0.47913307, -0.21536988, -0.34784976],
       [-0.48999432, -0.40059173, -0.10962176, -0.30026975,  0.08471304,
         0.5718263 ,  0.56375986,  0.6077195 , -0.6059732 ,  0.55252343],
       [ 0.14620066, -0.35362753, -0.236161  ,  0.06647271,  0.53771263,
         0.23777008,  0.24534255, -0

## Models: composing layers

Many interesting layer-like things in machine learning models are implemented by composing existing layers. For example, each residual block in a resnet is a composition of convolutions, batch normalizations, and a shortcut.

The main class used when creating a layer-like thing which contains other layers is tf.keras.Model. Implementing one is done by inheriting from `tf.keras.Model`.

In [7]:
class ResnetIdentityBlock(tf.keras.Model):
    def __init__(self, kernel_size, filters):
        super(ResnetIdentityBlock, self).__init__(name='')
        filters1, filters2, filters3 = filters

        self.conv2a = tf.keras.layers.Conv2D(filters1, (1, 1))
        self.bn2a = tf.keras.layers.BatchNormalization()

        self.conv2b = tf.keras.layers.Conv2D(filters2, kernel_size, padding='same')
        self.bn2b = tf.keras.layers.BatchNormalization()

        self.conv2c = tf.keras.layers.Conv2D(filters3, (1, 1))
        self.bn2c = tf.keras.layers.BatchNormalization()

    def call(self, input_tensor, training=False):
        x = self.conv2a(input_tensor)
        x = self.bn2a(x, training=training)
        x = tf.nn.relu(x)

        x = self.conv2b(x)
        x = self.bn2b(x, training=training)
        x = tf.nn.relu(x)

        x = self.conv2c(x)
        x = self.bn2c(x, training=training)

        x += input_tensor
        
        return tf.nn.relu(x)


block = ResnetIdentityBlock(1, [1, 2, 3])

print(block(tf.zeros([1, 2, 3, 3])))
print([x.name for x in block.trainable_variables])

tf.Tensor(
[[[[0. 0. 0.]
   [0. 0. 0.]
   [0. 0. 0.]]

  [[0. 0. 0.]
   [0. 0. 0.]
   [0. 0. 0.]]]], shape=(1, 2, 3, 3), dtype=float32)
['resnet_identity_block/conv2d/kernel:0', 'resnet_identity_block/conv2d/bias:0', 'resnet_identity_block/batch_normalization/gamma:0', 'resnet_identity_block/batch_normalization/beta:0', 'resnet_identity_block/conv2d_1/kernel:0', 'resnet_identity_block/conv2d_1/bias:0', 'resnet_identity_block/batch_normalization_1/gamma:0', 'resnet_identity_block/batch_normalization_1/beta:0', 'resnet_identity_block/conv2d_2/kernel:0', 'resnet_identity_block/conv2d_2/bias:0', 'resnet_identity_block/batch_normalization_2/gamma:0', 'resnet_identity_block/batch_normalization_2/beta:0']


Much of the time, however, models which compose many layers simply call one layer after the other. This can be done in very little code using `tf.keras.Sequential`.

In [8]:
 my_seq = tf.keras.Sequential([tf.keras.layers.Conv2D(1, (1, 1)),
                               tf.keras.layers.BatchNormalization(),
                               tf.keras.layers.Conv2D(2, 1, padding='same'),
                               tf.keras.layers.BatchNormalization(),
                               tf.keras.layers.Conv2D(3, (1, 1)),
                               tf.keras.layers.BatchNormalization()])
    
    
my_seq(tf.zeros([1, 2, 3, 3]))

<tf.Tensor: id=514, shape=(1, 2, 3, 3), dtype=float32, numpy=
array([[[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]]], dtype=float32)>