In [1]:
import tensorflow as tf
from typeguard import typechecked

# maxout network
在使用中需要注意输出结果归一化一类的操作，防止因为输出结果过高造成训练失败

In [2]:
class Maxoutlayer(tf.keras.layers.Layer):
    def __init__(self, k, m):
        super(Maxoutlayer, self).__init__()
        self.k = int(k)
        self.m = int(m)

    def build(self, input_shape, dtype=tf.float32):
        self.d = input_shape[-1]
        print(self.d,input_shape)
        self.w = self.add_weight(name='w',
                                 shape=(self.d, self.m, self.k),
                                 initializer='uniform',
                                 dtype=dtype,
                                 trainable=True)
        self.b = self.add_weight(name='b',
                                 shape=(self.m, self.k),
                                 initializer='zero',
                                 dtype=dtype,
                                 trainable=True)
        super(Maxoutlayer, self).build(input_shape)
#         self.built = True

    def call(self, x):
        outputs = tf.tensordot(x, self.w, axes=1) + self.b
        outputs = tf.reduce_max(outputs, axis=2)
        return outputs

In [3]:
# 生成多维矩阵
a = tf.range(90,0,-1)
a = tf.reshape(a,[-1,10])
a = tf.cast(a,dtype=tf.float32)
a

<tf.Tensor: id=6, shape=(9, 10), dtype=float32, numpy=
array([[90., 89., 88., 87., 86., 85., 84., 83., 82., 81.],
       [80., 79., 78., 77., 76., 75., 74., 73., 72., 71.],
       [70., 69., 68., 67., 66., 65., 64., 63., 62., 61.],
       [60., 59., 58., 57., 56., 55., 54., 53., 52., 51.],
       [50., 49., 48., 47., 46., 45., 44., 43., 42., 41.],
       [40., 39., 38., 37., 36., 35., 34., 33., 32., 31.],
       [30., 29., 28., 27., 26., 25., 24., 23., 22., 21.],
       [20., 19., 18., 17., 16., 15., 14., 13., 12., 11.],
       [10.,  9.,  8.,  7.,  6.,  5.,  4.,  3.,  2.,  1.]], dtype=float32)>

In [4]:
t = Maxoutlayer(5,4)
g = t(a)
g

10 (9, 10)


<tf.Tensor: id=45, shape=(9, 4), dtype=float32, numpy=
array([[14.594133  ,  1.2647249 , 12.365188  , 13.857061  ],
       [12.842349  ,  1.0717227 , 10.939661  , 12.202095  ],
       [11.090565  ,  0.87872046,  9.5141325 , 10.547126  ],
       [ 9.338782  ,  0.6857188 ,  8.088605  ,  8.8921585 ],
       [ 7.586999  ,  0.4927164 ,  6.6630764 ,  7.237191  ],
       [ 5.8352156 ,  0.29971468,  5.237549  ,  5.5822234 ],
       [ 4.0834317 ,  0.1067127 ,  3.8120208 ,  3.927256  ],
       [ 2.3316486 , -0.08628947,  2.3864932 ,  2.272288  ],
       [ 0.82125646, -0.27929142,  0.9609653 ,  0.6173205 ]],
      dtype=float32)>

In [5]:
mean,variance = tf.nn.moments(g,-1)
mean,variance

(<tf.Tensor: id=52, shape=(9,), dtype=float32, numpy=
 array([10.520277 ,  9.263957 ,  8.007636 ,  6.751316 ,  5.494996 ,
         4.2386756,  2.982355 ,  1.7260351,  0.5300627], dtype=float32)>,
 <tf.Tensor: id=53, shape=(9,), dtype=float32, numpy=
 array([29.199844 , 22.839556 , 17.261106 , 12.4645   ,  8.449734 ,
         5.2168055,  2.7657177,  1.0964712,  0.2332847], dtype=float32)>)

In [6]:
t = (g - tf.reshape(mean,(9,-1))) / tf.reshape(variance**0.5,(9,-1))
tf.nn.moments(t,-1)

(<tf.Tensor: id=68, shape=(9,), dtype=float32, numpy=
 array([-2.9802322e-08,  0.0000000e+00, -5.9604645e-08, -1.4901161e-08,
        -1.1920929e-07,  1.4901161e-08,  1.4901161e-07, -1.4901161e-08,
         8.9406967e-08], dtype=float32)>,
 <tf.Tensor: id=69, shape=(9,), dtype=float32, numpy=
 array([1.0000001 , 1.0000001 , 0.99999994, 1.        , 1.        ,
        0.99999994, 1.        , 1.        , 1.        ], dtype=float32)>)

# maxout activation function
maxout作为激活函数时，只是筛选出在units和axis上最大的一组，不具备参数

## TensorFlow Addons=0.8.0内置函数
筛选为全体最大的选项
- `num_units`为单元数，应为对应shape值的因数
- `axis`为变化维度，关注shape变换


In [7]:
class Maxout(tf.keras.layers.Layer):
    @typechecked
    def __init__(self, num_units: int, axis: int = -1, **kwargs):
        super().__init__(**kwargs)
        self.num_units = num_units
        self.axis = axis

    def call(self, inputs):
        inputs = tf.convert_to_tensor(inputs)
        shape = inputs.get_shape().as_list()
        # Dealing with batches with arbitrary sizes
        for i in range(len(shape)):
            if shape[i] is None:
                shape[i] = tf.shape(inputs)[i]

        num_channels = shape[self.axis]
        if not isinstance(num_channels, tf.Tensor) and num_channels % self.num_units:
            raise ValueError("number of features({}) is not "
                             "a multiple of num_units({})".format(num_channels, self.num_units))

        if self.axis < 0:
            axis = self.axis + len(shape)
        else:
            axis = self.axis
        assert axis >= 0, "Find invalid axis: {}".format(self.axis)

        expand_shape = shape[:]
        expand_shape[axis] = self.num_units
        k = num_channels // self.num_units
        expand_shape.insert(axis, k)

        outputs = tf.math.reduce_max(tf.reshape(inputs, expand_shape), axis, keepdims=False)
        return outputs

## tensorflow=1.13.1内的
筛选为num_unit内的最大选项

In [8]:
# #调用tf 1使用
# def maxout(inputs, num_units, axis=-1, scope=None):
#     with variable_scope.variable_scope(scope, 'MaxOut', [inputs]):
#         inputs = ops.convert_to_tensor(inputs)
#         shape = inputs.get_shape().as_list()
#         num_channels = shape[axis]
#         if num_channels % num_units:
#             raise ValueError('number of features({}) is not '
#                            'a multiple of num_units({})'.format(
#                                num_channels, num_units))
#         shape[axis] = num_units
#         shape += [num_channels // num_units]

#         # Dealing with batches with arbitrary sizes
#         for i in range(len(shape)):
#             if shape[i] is None:
#                 shape[i] = array_ops.shape(inputs)[i]
#         outputs = math_ops.reduce_max(
#             array_ops.reshape(inputs, shape), -1, keepdims=False)
#         return outputs

## 简化
筛选为num_unit内的最大选项

In [9]:
class Maxout_af(tf.keras.layers.Layer):
    def __init__(self, num_units: int, axis: int = -1, **kwargs):
        super().__init__(**kwargs)
        self.num_units = num_units
        self.axis = axis
    def call(self, inputs):
        inputs = tf.convert_to_tensor(inputs)
        shape = inputs.get_shape().as_list()
        num_channels = shape[self.axis]
        if num_channels % num_units:
            raise ValueError('number of features({}) is not a multiple of num_units({})'.format(num_channels, num_units))
        shape[axis] = -1
        shape += [num_channels // num_units]
        outputs = tf.math.reduce_max(tf.reshape(inputs, shape), -1, keepdims=False)
        return outputs

## 例子

In [10]:
# 生成多维矩阵
a = tf.range(90,0,-1)
a = tf.reshape(a,[-1,6,3])
a

<tf.Tensor: id=75, shape=(5, 6, 3), dtype=int32, numpy=
array([[[90, 89, 88],
        [87, 86, 85],
        [84, 83, 82],
        [81, 80, 79],
        [78, 77, 76],
        [75, 74, 73]],

       [[72, 71, 70],
        [69, 68, 67],
        [66, 65, 64],
        [63, 62, 61],
        [60, 59, 58],
        [57, 56, 55]],

       [[54, 53, 52],
        [51, 50, 49],
        [48, 47, 46],
        [45, 44, 43],
        [42, 41, 40],
        [39, 38, 37]],

       [[36, 35, 34],
        [33, 32, 31],
        [30, 29, 28],
        [27, 26, 25],
        [24, 23, 22],
        [21, 20, 19]],

       [[18, 17, 16],
        [15, 14, 13],
        [12, 11, 10],
        [ 9,  8,  7],
        [ 6,  5,  4],
        [ 3,  2,  1]]])>

In [11]:
# 设置 num_units 和 axis
num_units = 1
axis = 1

In [12]:
# tensorflow 1 和简化版本取得是每几个中的最大的值
# 重点关注当axis = 0时，是每几个数取最大值
m_af = Maxout_af(num_units,axis)
m_af(a)

<tf.Tensor: id=79, shape=(5, 1, 3), dtype=int32, numpy=
array([[[90, 84, 78]],

       [[72, 66, 60]],

       [[54, 48, 42]],

       [[36, 30, 24]],

       [[18, 12,  6]]])>

In [13]:
# tensorflow Addons取的是所有中最大的值
# 重点关注当axis = 0时，是全部取最大值
m = Maxout(num_units,axis)
m(a)

<tf.Tensor: id=83, shape=(5, 1, 3), dtype=int32, numpy=
array([[[90, 89, 88]],

       [[72, 71, 70]],

       [[54, 53, 52]],

       [[36, 35, 34]],

       [[18, 17, 16]]])>