unit8co · hrzn · May 18, 2022 · May 11, 2022 · May 11, 2022 · May 11, 2022
@@ -10,6 +10,7 @@ Darts is still in an early development phase and we cannot always guarantee back
 **Improved**
 - Option to avoid global matplotlib configuration changes.
   [#924](https://github.com/unit8co/darts/pull/924) by [Mike Richman](https://github.com/zgana).
+- Model Improvements: Option for changing activation function for NHiTs and NBEATS. NBEATS support for dropout. NHiTs Support for AvgPooling1d. [#955](https://github.com/unit8co/darts/pull/955) by [Greg DeVos](https://github.com/gdevos010)
 
 ## [0.19.0](https://github.com/unit8co/darts/tree/0.19.0) (2022-04-13)
 ### For users of the library:

@@ -17,6 +17,20 @@
 logger = get_logger(__name__)
 
 
+ACTIVATIONS = [
+    "ReLU",
+    "RReLU",
+    "PReLU",
+    "ELU",
+    "Softplus",
+    "Tanh",
+    "SELU",
+    "LeakyReLU",
+    "Sigmoid",
+    "GELU",
+]
+
+
 class _GType(Enum):
     GENERIC = 1
     TREND = 2
@@ -79,6 +93,9 @@ def __init__(
         input_chunk_length: int,
         target_length: int,
         g_type: GTypes,
+        batch_norm: bool,
+        dropout: float,
+        activation: str,
     ):
         """PyTorch module implementing the basic building block of the N-BEATS architecture.
 
@@ -104,6 +121,12 @@ def __init__(
             The length of the forecast of the model.
         g_type
             The type of function that is implemented by the waveform generator.
+        batch_norm
+            Whether to use batch norm
+        dropout
+            Dropout probability
+        activation
+            The activation function of encoder/decoder intermediate layer.
 
         Inputs
         ------
@@ -126,13 +149,25 @@ def __init__(
         self.target_length = target_length
         self.nr_params = nr_params
         self.g_type = g_type
-        self.relu = nn.ReLU()
+        self.dropout = dropout
+        self.batch_norm = batch_norm
+
+        assert activation in ACTIVATIONS, f"{activation} is not in {ACTIVATIONS}"
+        self.activation = getattr(nn, activation)()
 
         # fully connected stack before fork
         self.linear_layer_stack_list = [nn.Linear(input_chunk_length, layer_width)]
-        self.linear_layer_stack_list += [
-            nn.Linear(layer_width, layer_width) for _ in range(num_layers - 1)
-        ]
+        for _ in range(num_layers - 1):
+            self.linear_layer_stack_list.append(nn.Linear(layer_width, layer_width))
+
+            if self.batch_norm:
+                self.linear_layer_stack_list.append(
+                    nn.BatchNorm1d(num_features=self.layer_width)
+                )
+
+            if self.dropout > 0:
+                self.linear_layer_stack_list.append(nn.Dropout(p=self.dropout))
+
         self.fc_stack = nn.ModuleList(self.linear_layer_stack_list)
 
         # Fully connected layer producing forecast/backcast expansion coeffcients (waveform generator parameters).
@@ -172,7 +207,7 @@ def forward(self, x):
 
         # fully connected layer stack
         for layer in self.linear_layer_stack_list:
-            x = self.relu(layer(x))
+            x = self.activation(layer(x))
 
         # forked linear layers producing waveform generator parameters
         theta_backcast = self.backcast_linear_layer(x)
@@ -202,6 +237,9 @@ def __init__(
         input_chunk_length: int,
         target_length: int,
         g_type: GTypes,
+        batch_norm: bool,
+        dropout: float,
+        activation: str,
     ):
         """PyTorch module implementing one stack of the N-BEATS architecture that comprises multiple basic blocks.
 
@@ -223,6 +261,12 @@ def __init__(
             The length of the forecast of the model.
         g_type
             The function that is implemented by the waveform generators in each block.
+        batch_norm
+            whether to apply batch norm on first block of this stack
+        dropout
+            Dropout probability
+        activation
+            The activation function of encoder/decoder intermediate layer.
 
         Inputs
         ------
@@ -243,6 +287,9 @@ def __init__(
         self.input_chunk_length = input_chunk_length
         self.target_length = target_length
         self.nr_params = nr_params
+        self.dropout = dropout
+        self.batch_norm = batch_norm
+        self.activation = activation
 
         if g_type == _GType.GENERIC:
             self.blocks_list = [
@@ -254,8 +301,13 @@ def __init__(
                     input_chunk_length,
                     target_length,
                     g_type,
+                    batch_norm=(
+                        self.batch_norm and i == 0
+                    ),  # batch norm only on first block of first stack
+                    dropout=self.dropout,
+                    activation=self.activation,
                 )
-                for _ in range(num_blocks)
+                for i in range(num_blocks)
             ]
         else:
             # same block instance is used for weight sharing
@@ -267,6 +319,9 @@ def __init__(
                 input_chunk_length,
                 target_length,
                 g_type,
+                batch_norm=self.batch_norm,
+                dropout=self.dropout,
+                activation=self.activation,
             )
             self.blocks_list = [interpretable_block] * num_blocks
 
@@ -310,7 +365,10 @@ def __init__(
         layer_widths: List[int],
         expansion_coefficient_dim: int,
         trend_polynomial_degree: int,
-        **kwargs
+        batch_norm: bool,
+        dropout: float,
+        activation: str,
+        **kwargs,
     ):
         """PyTorch module implementing the N-BEATS architecture.
 
@@ -342,6 +400,12 @@ def __init__(
         trend_polynomial_degree
             The degree of the polynomial used as waveform generator in trend stacks. Only used if
             `generic_architecture` is set to `False`.
+        batch_norm
+            Whether to apply batch norm on first block of the first stack
+        dropout
+            Dropout probability
+        activation
+            The activation function of encoder/decoder intermediate layer.
         **kwargs
             all parameters required for :class:`darts.model.forecasting_models.PLForecastingModule` base class.
 
@@ -363,6 +427,9 @@ def __init__(
         self.nr_params = nr_params
         self.input_chunk_length_multi = self.input_chunk_length * input_dim
         self.target_length = self.output_chunk_length * input_dim
+        self.dropout = dropout
+        self.batch_norm = batch_norm
+        self.activation = activation
 
         if generic_architecture:
             self.stacks_list = [
@@ -375,6 +442,11 @@ def __init__(
                     self.input_chunk_length_multi,
                     self.target_length,
                     _GType.GENERIC,
+                    batch_norm=(
+                        self.batch_norm and i == 0
+                    ),  # batch norm only on first block of first stack
+                    dropout=self.dropout,
+                    activation=self.activation,
                 )
                 for i in range(num_stacks)
             ]
@@ -389,6 +461,9 @@ def __init__(
                 self.input_chunk_length_multi,
                 self.target_length,
                 _GType.TREND,
+                batch_norm=self.batch_norm,
+                dropout=self.dropout,
+                activation=self.activation,
             )
             seasonality_stack = _Stack(
                 num_blocks,
@@ -399,6 +474,9 @@ def __init__(
                 self.input_chunk_length_multi,
                 self.target_length,
                 _GType.SEASONALITY,
+                batch_norm=self.batch_norm,
+                dropout=self.dropout,
+                activation=self.activation,
             )
             self.stacks_list = [trend_stack, seasonality_stack]
 
@@ -460,7 +538,9 @@ def __init__(
         layer_widths: Union[int, List[int]] = 256,
         expansion_coefficient_dim: int = 5,
         trend_polynomial_degree: int = 2,
-        **kwargs
+        dropout: float = 0.0,
+        activation: str = "ReLU",
+        **kwargs,
     ):
         """Neural Basis Expansion Analysis Time Series Forecasting (N-BEATS).
 
@@ -502,6 +582,11 @@ def __init__(
         trend_polynomial_degree
             The degree of the polynomial used as waveform generator in trend stacks. Only used if
             `generic_architecture` is set to `False`.
+        dropout
+            The dropout probability to be used in the fully connected layers (default=0.0).
+        activation
+            The activation function of encoder/decoder intermediate layer (default='ReLU').
+            Supported activations: ['ReLU','RReLU', 'PReLU', 'Softplus', 'Tanh', 'SELU', 'LeakyReLU',  'Sigmoid']
         **kwargs
             Optional arguments to initialize the pytorch_lightning.Module, pytorch_lightning.Trainer, and
             Darts' :class:`TorchForecastingModel`.
@@ -656,6 +741,12 @@ def __init__(
         self.expansion_coefficient_dim = expansion_coefficient_dim
         self.trend_polynomial_degree = trend_polynomial_degree
 
+        # Currently batch norm is not an option as it seems to perform badly
+        self.batch_norm = False
+
+        self.dropout = dropout
+        self.activation = activation
+
         if not generic_architecture:
             self.num_stacks = 2
 
@@ -681,5 +772,8 @@ def _create_model(self, train_sample: Tuple[torch.Tensor]) -> torch.nn.Module:
             layer_widths=self.layer_widths,
             expansion_coefficient_dim=self.expansion_coefficient_dim,
             trend_polynomial_degree=self.trend_polynomial_degree,
+            batch_norm=self.batch_norm,
+            dropout=self.dropout,
+            activation=self.activation,
             **self.pl_module_params,
         )