Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finish TODOs in NHiTs and NBEATs #955

Merged
merged 16 commits into from
May 18, 2022
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Darts is still in an early development phase and we cannot always guarantee back
**Improved**
- Option to avoid global matplotlib configuration changes.
[#924](https://github.com/unit8co/darts/pull/924) by [Mike Richman](https://github.com/zgana).
- Model Improvements: Option for changing activation function for NHiTs and NBEATS. NBEATS support for dropout. NHiTs Support for AvgPooling1d. [#955](https://github.com/unit8co/darts/pull/955) by [Greg DeVos](https://github.com/gdevos010)

## [0.19.0](https://github.com/unit8co/darts/tree/0.19.0) (2022-04-13)
### For users of the library:
Expand Down
110 changes: 102 additions & 8 deletions darts/models/forecasting/nbeats.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@
logger = get_logger(__name__)


ACTIVATIONS = [
"ReLU",
"RReLU",
"PReLU",
"ELU",
"Softplus",
"Tanh",
"SELU",
"LeakyReLU",
"Sigmoid",
"GELU",
]


class _GType(Enum):
GENERIC = 1
TREND = 2
Expand Down Expand Up @@ -79,6 +93,9 @@ def __init__(
input_chunk_length: int,
target_length: int,
g_type: GTypes,
batch_norm: bool,
dropout: float,
activation: str,
):
"""PyTorch module implementing the basic building block of the N-BEATS architecture.

Expand All @@ -104,6 +121,12 @@ def __init__(
The length of the forecast of the model.
g_type
The type of function that is implemented by the waveform generator.
batch_norm
Whether to use batch norm
dropout
Dropout probability
activation
The activation function of encoder/decoder intermediate layer.

Inputs
------
Expand All @@ -126,13 +149,25 @@ def __init__(
self.target_length = target_length
self.nr_params = nr_params
self.g_type = g_type
self.relu = nn.ReLU()
self.dropout = dropout
self.batch_norm = batch_norm

assert activation in ACTIVATIONS, f"{activation} is not in {ACTIVATIONS}"
gdevos010 marked this conversation as resolved.
Show resolved Hide resolved
self.activation = getattr(nn, activation)()

# fully connected stack before fork
self.linear_layer_stack_list = [nn.Linear(input_chunk_length, layer_width)]
self.linear_layer_stack_list += [
nn.Linear(layer_width, layer_width) for _ in range(num_layers - 1)
]
for _ in range(num_layers - 1):
self.linear_layer_stack_list.append(nn.Linear(layer_width, layer_width))

if self.batch_norm:
self.linear_layer_stack_list.append(
nn.BatchNorm1d(num_features=self.layer_width)
)

if self.dropout > 0:
self.linear_layer_stack_list.append(nn.Dropout(p=self.dropout))

self.fc_stack = nn.ModuleList(self.linear_layer_stack_list)

# Fully connected layer producing forecast/backcast expansion coeffcients (waveform generator parameters).
Expand Down Expand Up @@ -172,7 +207,7 @@ def forward(self, x):

# fully connected layer stack
for layer in self.linear_layer_stack_list:
x = self.relu(layer(x))
x = self.activation(layer(x))

# forked linear layers producing waveform generator parameters
theta_backcast = self.backcast_linear_layer(x)
Expand Down Expand Up @@ -202,6 +237,9 @@ def __init__(
input_chunk_length: int,
target_length: int,
g_type: GTypes,
batch_norm: bool,
dropout: float,
activation: str,
):
"""PyTorch module implementing one stack of the N-BEATS architecture that comprises multiple basic blocks.

Expand All @@ -223,6 +261,12 @@ def __init__(
The length of the forecast of the model.
g_type
The function that is implemented by the waveform generators in each block.
batch_norm
whether to apply batch norm on first block of this stack
dropout
Dropout probability
activation
The activation function of encoder/decoder intermediate layer.

Inputs
------
Expand All @@ -243,6 +287,9 @@ def __init__(
self.input_chunk_length = input_chunk_length
self.target_length = target_length
self.nr_params = nr_params
self.dropout = dropout
self.batch_norm = batch_norm
self.activation = activation

if g_type == _GType.GENERIC:
self.blocks_list = [
Expand All @@ -254,8 +301,13 @@ def __init__(
input_chunk_length,
target_length,
g_type,
batch_norm=(
self.batch_norm and i == 0
), # batch norm only on first block of first stack
dropout=self.dropout,
activation=self.activation,
)
for _ in range(num_blocks)
for i in range(num_blocks)
]
else:
# same block instance is used for weight sharing
Expand All @@ -267,6 +319,9 @@ def __init__(
input_chunk_length,
target_length,
g_type,
batch_norm=self.batch_norm,
dropout=self.dropout,
activation=self.activation,
)
self.blocks_list = [interpretable_block] * num_blocks

Expand Down Expand Up @@ -310,7 +365,10 @@ def __init__(
layer_widths: List[int],
expansion_coefficient_dim: int,
trend_polynomial_degree: int,
**kwargs
batch_norm: bool,
dropout: float,
activation: str,
**kwargs,
):
"""PyTorch module implementing the N-BEATS architecture.

Expand Down Expand Up @@ -342,6 +400,12 @@ def __init__(
trend_polynomial_degree
The degree of the polynomial used as waveform generator in trend stacks. Only used if
`generic_architecture` is set to `False`.
batch_norm
Whether to apply batch norm on first block of the first stack
dropout
Dropout probability
activation
The activation function of encoder/decoder intermediate layer.
**kwargs
all parameters required for :class:`darts.model.forecasting_models.PLForecastingModule` base class.

Expand All @@ -363,6 +427,9 @@ def __init__(
self.nr_params = nr_params
self.input_chunk_length_multi = self.input_chunk_length * input_dim
self.target_length = self.output_chunk_length * input_dim
self.dropout = dropout
self.batch_norm = batch_norm
self.activation = activation

if generic_architecture:
self.stacks_list = [
Expand All @@ -375,6 +442,11 @@ def __init__(
self.input_chunk_length_multi,
self.target_length,
_GType.GENERIC,
batch_norm=(
self.batch_norm and i == 0
), # batch norm only on first block of first stack
dropout=self.dropout,
activation=self.activation,
)
for i in range(num_stacks)
]
Expand All @@ -389,6 +461,9 @@ def __init__(
self.input_chunk_length_multi,
self.target_length,
_GType.TREND,
batch_norm=self.batch_norm,
dropout=self.dropout,
activation=self.activation,
)
seasonality_stack = _Stack(
num_blocks,
Expand All @@ -399,6 +474,9 @@ def __init__(
self.input_chunk_length_multi,
self.target_length,
_GType.SEASONALITY,
batch_norm=self.batch_norm,
dropout=self.dropout,
activation=self.activation,
)
self.stacks_list = [trend_stack, seasonality_stack]

Expand Down Expand Up @@ -460,7 +538,9 @@ def __init__(
layer_widths: Union[int, List[int]] = 256,
expansion_coefficient_dim: int = 5,
trend_polynomial_degree: int = 2,
**kwargs
dropout: float = 0.0,
activation: str = "ReLU",
**kwargs,
):
"""Neural Basis Expansion Analysis Time Series Forecasting (N-BEATS).

Expand Down Expand Up @@ -502,6 +582,11 @@ def __init__(
trend_polynomial_degree
The degree of the polynomial used as waveform generator in trend stacks. Only used if
`generic_architecture` is set to `False`.
dropout
The dropout probability to be used in the fully connected layers (default=0.0).
activation
The activation function of encoder/decoder intermediate layer (default='ReLU').
Supported activations: ['ReLU','RReLU', 'PReLU', 'Softplus', 'Tanh', 'SELU', 'LeakyReLU', 'Sigmoid']
**kwargs
Optional arguments to initialize the pytorch_lightning.Module, pytorch_lightning.Trainer, and
Darts' :class:`TorchForecastingModel`.
Expand Down Expand Up @@ -656,6 +741,12 @@ def __init__(
self.expansion_coefficient_dim = expansion_coefficient_dim
self.trend_polynomial_degree = trend_polynomial_degree

# Currently batch norm is not an option as it seems to perform badly
self.batch_norm = False
gdevos010 marked this conversation as resolved.
Show resolved Hide resolved

self.dropout = dropout
self.activation = activation

if not generic_architecture:
self.num_stacks = 2

Expand All @@ -681,5 +772,8 @@ def _create_model(self, train_sample: Tuple[torch.Tensor]) -> torch.nn.Module:
layer_widths=self.layer_widths,
expansion_coefficient_dim=self.expansion_coefficient_dim,
trend_polynomial_degree=self.trend_polynomial_degree,
batch_norm=self.batch_norm,
dropout=self.dropout,
activation=self.activation,
**self.pl_module_params,
)