Skip to content

Commit

Permalink
Gluon PReLU, ELU, SELU, Swish (apache#9662)
Browse files Browse the repository at this point in the history
* prelu, elu, selu, swish

* update

* fix infer shape

* update infer shape

* update
  • Loading branch information
szha authored and zheng-da committed Jun 28, 2018
1 parent 559ccff commit 547c1d0
Show file tree
Hide file tree
Showing 8 changed files with 285 additions and 84 deletions.
3 changes: 2 additions & 1 deletion cpp-package/example/mlp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,15 @@ void MLP() {
vector<Symbol> biases(nLayers);
vector<Symbol> outputs(nLayers);

Symbol null_sym;
for (int i = 0; i < nLayers; i++) {
string istr = to_string(i);
weights[i] = Symbol::Variable(string("w") + istr);
biases[i] = Symbol::Variable(string("b") + istr);
Symbol fc = FullyConnected(string("fc") + istr,
i == 0? sym_x : outputs[i-1],
weights[i], biases[i], layerSizes[i]);
outputs[i] = LeakyReLU(string("act") + istr, fc, LeakyReLUActType::kLeaky);
outputs[i] = LeakyReLU(string("act") + istr, fc, null_sym, LeakyReLUActType::kLeaky);
}
auto sym_out = SoftmaxOutput("softmax", outputs[nLayers - 1], sym_label);

Expand Down
2 changes: 2 additions & 0 deletions python/mxnet/gluon/nn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@
from .basic_layers import *

from .conv_layers import *

from .activations import *
209 changes: 209 additions & 0 deletions python/mxnet/gluon/nn/activations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# coding: utf-8
# pylint: disable= arguments-differ
"""Basic neural network layers."""
__all__ = ['Activation', 'LeakyReLU', 'PReLU', 'ELU', 'SELU', 'Swish']

from ... import initializer
from ..block import HybridBlock


class Activation(HybridBlock):
r"""Applies an activation function to input.
Parameters
----------
activation : str
Name of activation function to use.
See :func:`~mxnet.ndarray.Activation` for available choices.
Inputs:
- **data**: input tensor with arbitrary shape.
Outputs:
- **out**: output tensor with the same shape as `data`.
"""
def __init__(self, activation, **kwargs):
self._act_type = activation
super(Activation, self).__init__(**kwargs)

def _alias(self):
return self._act_type

def hybrid_forward(self, F, x):
return F.Activation(x, act_type=self._act_type, name='fwd')

def __repr__(self):
s = '{name}({_act_type})'
return s.format(name=self.__class__.__name__,
**self.__dict__)


class LeakyReLU(HybridBlock):
r"""Leaky version of a Rectified Linear Unit.
It allows a small gradient when the unit is not active
.. math::
f\left(x\right) = \left\{
\begin{array}{lr}
\alpha x & : x \lt 0 \\
x & : x \geq 0 \\
\end{array}
\right.\\
Parameters
----------
alpha : float
slope coefficient for the negative half axis. Must be >= 0.
Inputs:
- **data**: input tensor with arbitrary shape.
Outputs:
- **out**: output tensor with the same shape as `data`.
"""
def __init__(self, alpha, **kwargs):
assert alpha >= 0, "Slope coefficient for LeakyReLU must be no less than 0."
super(LeakyReLU, self).__init__(**kwargs)
self._alpha = alpha

def hybrid_forward(self, F, x):
return F.LeakyReLU(x, act_type='leaky', slope=self._alpha, name='fwd')

def __repr__(self):
s = '{name}({alpha})'
return s.format(name=self.__class__.__name__,
alpha=self._alpha)


class PReLU(HybridBlock):
r"""Parametric leaky version of a Rectified Linear Unit.
<https://arxiv.org/abs/1502.01852>`_ paper.
It learns a gradient when the unit is not active
.. math::
f\left(x\right) = \left\{
\begin{array}{lr}
\alpha x & : x \lt 0 \\
x & : x \geq 0 \\
\end{array}
\right.\\
where alpha is a learned parameter.
Parameters
----------
alpha_initializer : Initializer
Initializer for the `embeddings` matrix.
Inputs:
- **data**: input tensor with arbitrary shape.
Outputs:
- **out**: output tensor with the same shape as `data`.
"""
def __init__(self, alpha_initializer=initializer.Constant(0.25), **kwargs):
super(PReLU, self).__init__(**kwargs)
with self.name_scope():
self.alpha = self.params.get('alpha', shape=(1,), init=alpha_initializer)

def hybrid_forward(self, F, x, alpha):
return F.LeakyReLU(x, gamma=alpha, act_type='prelu', name='fwd')


class ELU(HybridBlock):
r"""
Exponential Linear Unit (ELU)
"Fast and Accurate Deep Network Learning by Exponential Linear Units", Clevert et al, 2016
https://arxiv.org/abs/1511.07289
Published as a conference paper at ICLR 2016
Parameters
----------
alpha : float
The alpha parameter as described by Clevert et al, 2016
Inputs:
- **data**: input tensor with arbitrary shape.
Outputs:
- **out**: output tensor with the same shape as `data`.
"""
def __init__(self, alpha=1.0, **kwargs):
super(ELU, self).__init__(**kwargs)
self._alpha = alpha

def hybrid_forward(self, F, x):
return F.where(x > 0, x, self._alpha * (F.exp(x) - 1.0))


class SELU(HybridBlock):
r"""
Scaled Exponential Linear Unit (SELU)
"Self-Normalizing Neural Networks", Klambauer et al, 2017
https://arxiv.org/abs/1706.02515
Inputs:
- **data**: input tensor with arbitrary shape.
Outputs:
- **out**: output tensor with the same shape as `data`.
"""
def __init__(self, **kwargs):
super(SELU, self).__init__(**kwargs)
self._scale = 1.0507009873554804934193349852946
self._alpha = 1.6732632423543772848170429916717

def hybrid_forward(self, F, x):
return self._scale * F.where(x > 0, x, self._alpha * (F.exp(x) - 1.0))


class Swish(HybridBlock):
r"""
Swish Activation function
https://arxiv.org/pdf/1710.05941.pdf
Parameters
----------
beta : float
swish(x) = x * sigmoid(beta*x)
Inputs:
- **data**: input tensor with arbitrary shape.
Outputs:
- **out**: output tensor with the same shape as `data`.
"""

def __init__(self, beta=1.0, **kwargs):
super(Swish, self).__init__(**kwargs)
self._beta = beta

def hybrid_forward(self, F, x):
return x * F.sigmoid(self._beta * x, name='fwd')
78 changes: 3 additions & 75 deletions python/mxnet/gluon/nn/basic_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
# coding: utf-8
# pylint: disable= arguments-differ
"""Basic neural network layers."""
__all__ = ['Sequential', 'HybridSequential', 'Dense', 'Activation',
'Dropout', 'BatchNorm', 'InstanceNorm', 'LeakyReLU', 'Embedding',
'Flatten', 'Lambda', 'HybridLambda']
__all__ = ['Sequential', 'HybridSequential', 'Dense', 'Dropout', 'Embedding',
'BatchNorm', 'InstanceNorm', 'Flatten', 'Lambda', 'HybridLambda']
import warnings
import numpy as np

from .activations import Activation
from ..block import Block, HybridBlock
from ..utils import _indent
from ... import nd, sym
Expand Down Expand Up @@ -216,38 +216,6 @@ def __repr__(self):
layout='{0} -> {1}'.format(shape[1] if shape[1] else None, shape[0]))


class Activation(HybridBlock):
r"""Applies an activation function to input.
Parameters
----------
activation : str
Name of activation function to use.
See :func:`~mxnet.ndarray.Activation` for available choices.
Inputs:
- **data**: input tensor with arbitrary shape.
Outputs:
- **out**: output tensor with the same shape as `data`.
"""
def __init__(self, activation, **kwargs):
self._act_type = activation
super(Activation, self).__init__(**kwargs)

def _alias(self):
return self._act_type

def hybrid_forward(self, F, x):
return F.Activation(x, act_type=self._act_type, name='fwd')

def __repr__(self):
s = '{name}({_act_type})'
return s.format(name=self.__class__.__name__,
**self.__dict__)


class Dropout(HybridBlock):
"""Applies Dropout to the input.
Expand Down Expand Up @@ -380,46 +348,6 @@ def __repr__(self):
for k, v in self._kwargs.items()]))


class LeakyReLU(HybridBlock):
r"""Leaky version of a Rectified Linear Unit.
It allows a small gradient when the unit is not active
.. math::
f\left(x\right) = \left\{
\begin{array}{lr}
\alpha x & : x \lt 0 \\
x & : x \geq 0 \\
\end{array}
\right.\\
Parameters
----------
alpha : float
slope coefficient for the negative half axis. Must be >= 0.
Inputs:
- **data**: input tensor with arbitrary shape.
Outputs:
- **out**: output tensor with the same shape as `data`.
"""
def __init__(self, alpha, **kwargs):
assert alpha >= 0, "Slope coefficient for LeakyReLU must be no less than 0."
super(LeakyReLU, self).__init__(**kwargs)
self._alpha = alpha

def hybrid_forward(self, F, x):
return F.LeakyReLU(x, act_type='leaky', slope=self._alpha, name='fwd')

def __repr__(self):
s = '{name}({alpha})'
return s.format(name=self.__class__.__name__,
alpha=self._alpha)


class Embedding(HybridBlock):
r"""Turns non-negative integers (indexes/tokens) into dense vectors
of fixed size. eg. [[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]
Expand Down
2 changes: 1 addition & 1 deletion python/mxnet/gluon/nn/conv_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from ..block import HybridBlock
from ... import symbol
from ...base import numeric_types
from .basic_layers import Activation
from .activations import Activation


def _infer_weight_shape(op_name, data_shape, kwargs):
Expand Down
33 changes: 27 additions & 6 deletions src/operator/leaky_relu-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,13 @@ class LeakyReLUOp : public Operator {
}
case leakyrelu::kPReLU: {
weight = in_data[leakyrelu::kGamma].get<xpu, 1, real_t>(s);
Assign(out, req[leakyrelu::kOut],
F<mshadow_op::xelu>(data, mshadow::expr::broadcast<1>(weight, out.shape_)));
if (weight.shape_.Size() == 1) {
Assign(out, req[leakyrelu::kOut],
F<mshadow_op::xelu>(data, mshadow::expr::broadcast_scalar(weight, out.shape_)));
} else {
Assign(out, req[leakyrelu::kOut],
F<mshadow_op::xelu>(data, mshadow::expr::broadcast<1>(weight, out.shape_)));
}
break;
}
case leakyrelu::kRReLU: {
Expand Down Expand Up @@ -177,9 +182,21 @@ class LeakyReLUOp : public Operator {
case leakyrelu::kPReLU: {
weight = in_data[leakyrelu::kGamma].get<xpu, 1, real_t>(s);
grad_weight = in_grad[leakyrelu::kGamma].get<xpu, 1, real_t>(s);
grad_weight = sumall_except_dim<1>(F<prelu_grad>(data) * grad);
gdata = F<mshadow_op::xelu_grad>(data, mshadow::expr::broadcast<1>(weight, data.shape_))
* grad;
if (weight.shape_.Size() == 1) {
Shape<4> gshape = Shape4(1, grad.shape_[0], grad.shape_[1], grad.shape_[2]);
Assign(grad_weight, req[leakyrelu::kGamma],
sumall_except_dim<0>(reshape(F<prelu_grad>(data) * grad, gshape)));
Assign(gdata, req[leakyrelu::kData],
F<mshadow_op::xelu_grad>(data,
mshadow::expr::broadcast_scalar(weight, data.shape_))
* grad);
} else {
Assign(grad_weight, req[leakyrelu::kGamma],
sumall_except_dim<1>(F<prelu_grad>(data) * grad));
Assign(gdata, req[leakyrelu::kData],
F<mshadow_op::xelu_grad>(data, mshadow::expr::broadcast<1>(weight, data.shape_))
* grad);
}
break;
}
case leakyrelu::kRReLU: {
Expand Down Expand Up @@ -225,7 +242,11 @@ class LeakyReLUProp : public OperatorProperty {
const TShape &dshape = in_shape->at(leakyrelu::kData);
if (dshape.ndim() == 0) return false;
if (param_.act_type == leakyrelu::kPReLU) {
in_shape->at(leakyrelu::kGamma) = TShape(Shape1(dshape[1]));
const TShape &gshape = in_shape->at(leakyrelu::kGamma);
if (gshape.ndim() == 1 && gshape.Size() == 1)
in_shape->at(leakyrelu::kGamma) = TShape(Shape1(1));
else
in_shape->at(leakyrelu::kGamma) = TShape(Shape1(dshape[1]));
}
out_shape->clear();
out_shape->push_back(dshape);
Expand Down
Loading

0 comments on commit 547c1d0

Please sign in to comment.