forked from mlpack/mlpack
-
Notifications
You must be signed in to change notification settings - Fork 0
/
layer.hpp
323 lines (289 loc) · 11.6 KB
/
layer.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
/**
* @file methods/ann/layer/layer.hpp
* @author Marcus Edel
*
* Base class for neural network layers.
*
* mlpack is free software; you may redistribute it and/or modify it under the
* terms of the 3-clause BSD license. You should have received a copy of the
* 3-clause BSD license along with mlpack. If not, see
* http://www.opensource.org/licenses/BSD-3-Clause for more information.
*/
#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_HPP
#define MLPACK_METHODS_ANN_LAYER_LAYER_HPP
namespace mlpack {
namespace ann {
/**
* A layer is an abstract class implementing common neural networks operations,
* such as convolution, batch norm, etc. These operations require managing
* weights, losses, updates, and inter-layer connectivity.
*
* Users will just instantiate a layer by inherited from the abstract class and
* implement the layer specific methods. It is recommend that descendants of
* Layer implement the following methods:
*
* - Constructor: Defines custom layer attributes, and creates layer state
* variables.
*
* - Forward(input, output): Performs the forward logic of applying the layer
* to the input object and storing the result in the output object.
*
* - Backward(input, gy, g): Performs a backpropagation step through the layer,
* with respect to the given input.
*
* - Gradient(input, error, gradient): Computing the gradient of the layer with
* respect to its own input.
*
* The memory for the layer's parameters (weights and biases) is not allocated
* by the layer itself, instead it is allocated by the network that the layer
* belongs to, and passed to the layer when it needs to use it.
*
* See the linear layer implementation for a basic example. It's a layer with
* two variables, w and b, that returns y = w * x + b. It shows how to implement
* Forward(), Backward() and Gradient(). The weights of the layers are tracked
* in layer.Parameters().
*
* @tparam MatType Matrix representation to accept as input and use for
* computation.
*/
template<typename MatType = arma::mat>
class Layer
{
public:
//! Default constructor.
Layer() : validOutputDimensions(false), training(false)
{ /* Nothing to do here */ }
//! Default deconstructor.
virtual ~Layer() { /* Nothing to do here */ }
//! Copy constructor. This is not responsible for copying weights!
Layer(const Layer& layer) :
inputDimensions(layer.inputDimensions),
outputDimensions(layer.outputDimensions),
validOutputDimensions(layer.validOutputDimensions),
training(layer.training)
{ /* Nothing to do here */ }
//! Make a copy of the object.
virtual Layer* Clone() const = 0;
//! Move constructor. This is not responsible for moving weights!
Layer(Layer&& layer) :
inputDimensions(std::move(layer.inputDimensions)),
outputDimensions(std::move(layer.outputDimensions)),
validOutputDimensions(std::move(layer.validOutputDimensions)),
training(std::move(layer.training))
{ /* Nothing to do here */ }
//! Copy assignment operator. This is not responsible for copying weights!
virtual Layer& operator=(const Layer& layer)
{
if (&layer != this)
{
inputDimensions = layer.inputDimensions;
outputDimensions = layer.outputDimensions;
validOutputDimensions = layer.validOutputDimensions;
training = layer.training;
}
return *this;
}
//! Move assignment operator. This is not responsible for moving weights!
virtual Layer& operator=(Layer&& layer)
{
if (&layer != this)
{
inputDimensions = std::move(layer.inputDimensions);
outputDimensions = std::move(layer.outputDimensions);
validOutputDimensions = std::move(layer.validOutputDimensions);
training = std::move(layer.training);
}
return *this;
}
/**
* Takes an input object, and computes the corresponding output of the layer.
* In general input and output are matrices. However, some special layers like
* table layers might expect something else. Please, refer to each layer
* specification for further information.
*
* @param * (input) Input data used for evaluating the specified layer.
* @param * (output) Resulting output.
*/
virtual void Forward(const MatType& /* input */,
MatType& /* output */)
{ /* Nothing to do here */ }
/**
* Takes an input and output object, and computes the corresponding loss of
* the layer. In general input and output are matrices. However, some special
* layers like table layers might expect something else. Please, refer to each
* layer specification for further information.
*
* @param * (input) Input data used for evaluating the specified layer.
* @param * (output) Resulting output.
*/
virtual void Forward(const MatType& /* input */,
const MatType& /* output */)
{ /* Nothing to do here */ }
/**
* Performs a backpropagation step through the layer, with respect to the
* given input. In general this method makes the assumption Forward(input,
* output) has been called before, with the same input. If you do not respect
* this rule, Backward(input, gy, g) might compute incorrect results.
*
* In general input and gy and g are matrices. However, some special
* sub-classes like table layers might expect something else. Please, refer to
* each module specification for further information.
*
* A backpropagation step consist of computing of computing the gradient
* output input with respect to the output of the layer and given error.
*
* During the backward pass our goal is to use 'gy' in order to compute the
* downstream gradients (g). We assume that the upstream gradient (gy) has
* already been computed and is passed to the layer.
*
* @param * (input) The propagated input activation.
* @param * (gy) The backpropagated error.
* @param * (g) The calculated gradient.
*/
virtual void Backward(const MatType& /* input */,
const MatType& /* gy */,
MatType& /* g */)
{ /* Nothing to do here */ }
/**
* Computing the gradient of the layer with respect to its own input. This is
* returned in gradient.
*
* The layer parameters (weights and biases) are updated accordingly using the
* computed gradient not by the layer itself, instead they are updated by the
* network that holds the instantiated layer.
*
* @param * (input) The input parameter used for calculating the gradient.
* @param * (error) The calculated error.
* @param * (gradient) The calculated gradient.
*/
virtual void Gradient(const MatType& /* input */,
const MatType& /* error */,
MatType& /* gradient */)
{ /* Nothing to do here */ }
/**
* Reset the layer parameter. The method is called to assigned the allocated
* memory to the internal layer parameters like weights and biases. The method
* should be called before the first call of Forward(input, output). If you
* do not respect this rule, Forward(input, output) and Backward(input, gy, g)
* might compute incorrect results.
*
* @param weightsPtr This pointer should be used as the first element of the
* memory that is allocated for this layer. In general, SetWeights()
* implementations should use MakeAlias() with weightsPtr to wrap the
* weights of a layer.
*/
virtual void SetWeights(typename MatType::elem_type* /* weightsPtr */) { }
/**
* Get the total number of trainable weights in the layer.
*/
virtual size_t WeightSize() const { return 0; }
/**
* Get whether the layer is currently in training mode.
*
* @note During network training, this should be set to `true` for each layer
* in the network, and when predicting/testing the network, this should be set
* to `false`. (This is handled automatically by the `FFN` class and other
* related classes.)
*/
virtual bool const& Training() const { return training; }
/**
* Modify whether the layer is currently in training mode.
*
* @note During network training, this should be set to `true` for each layer
* in the network, and when predicting/testing the network, this should be set
* to `false`. (This is handled automatically by the `FFN` class and other
* related classes.)
*/
virtual bool& Training() { return training; }
//! Get the layer loss. Overload this if the layer should add any extra loss
//! to the loss function when computing the objective. (TODO: better comment)
virtual double Loss() { return 0; }
//! Get the input dimensions.
const std::vector<size_t>& InputDimensions() const { return inputDimensions; }
//! Modify the input dimensions.
std::vector<size_t>& InputDimensions()
{
validOutputDimensions = false;
return inputDimensions;
}
//! Get the output dimensions.
const std::vector<size_t>& OutputDimensions()
{
if (!validOutputDimensions)
{
this->ComputeOutputDimensions();
validOutputDimensions = true;
}
return outputDimensions;
}
//! Get the parameters.
virtual const MatType& Parameters() const
{
throw std::invalid_argument("Layer::Parameters(): cannot access parameters "
"of a layer with no weights!");
}
//! Set the parameters.
virtual MatType& Parameters()
{
throw std::invalid_argument("Layer::Parameters(): cannot modify parameters "
"of a layer with no weights!");
}
//! Compute the output dimensions. This should be overloaded if the layer is
//! meant to work on higher-dimensional objects. When this is called, it is a
//! safe assumption that InputDimensions() is correct.
virtual void ComputeOutputDimensions()
{
// The default implementation is to assume that the output size is the same
// as the input.
outputDimensions = inputDimensions;
}
//! Get the number of elements in the output from this layer. This cannot be
//! overloaded! Overload `ComputeOutputDimensions()` instead.
virtual size_t OutputSize() final
{
if (!validOutputDimensions)
{
this->ComputeOutputDimensions();
validOutputDimensions = true;
}
size_t outputSize = 1;
for (size_t i = 0; i < this->outputDimensions.size(); ++i)
outputSize *= this->outputDimensions[i];
return outputSize;
}
//! Serialize the layer.
template<typename Archive>
void serialize(Archive& ar, const uint32_t /* version */)
{
ar(CEREAL_NVP(inputDimensions));
ar(CEREAL_NVP(outputDimensions));
ar(CEREAL_NVP(validOutputDimensions));
ar(CEREAL_NVP(training));
// Note that layer weights are serialized by the FFN!
}
protected:
/**
* Logical input dimensions of each point. Although each point given to !
* `Forward()` will be represented as a column in a matrix, logically
* speaking it can be a higher-order tensor. So, for instance, if the point
* is 2-dimensional images of size 10x10, `Forward()` will contain columns
* with 100 rows, and `inputDimensions` will be `{10, 10}`. This generalizes
* to higher dimensions.
*/
std::vector<size_t> inputDimensions;
/**
* Logical output dimensions of each point. If the layer only performs
* elementwise operations, this is most likely equal to `inputDimensions`; but
* if the layer performs more complicated transformations, it may be
* different.
*/
std::vector<size_t> outputDimensions;
//! This is `true` if `ComputeOutputDimensions()` has been called, and
//! `outputDimensions` can be considered to be up-to-date.
bool validOutputDimensions;
//! If true, the layer is in training mode; otherwise, it is in testing mode.
bool training;
};
} // namespace ann
} // namespace mlpack
#endif