# Reproduction notes



<""" """> and <#> : comments added by authors.

markdown block and <###> : description and comments added by FSFM MADS DEEP LEARNING student group.

No additions to the original code.

# Package import

In [None]:
import json
import os
import random
import string
import sys

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from tqdm import tqdm

In [None]:
from absl import app
from absl import flags
from absl import logging

In [None]:
from pandas.tseries.holiday import EasterMonday
from pandas.tseries.holiday import GoodFriday
from pandas.tseries.holiday import Holiday
from pandas.tseries.holiday import SU
from pandas.tseries.holiday import TH
from pandas.tseries.holiday import USColumbusDay
from pandas.tseries.holiday import USLaborDay
from pandas.tseries.holiday import USMartinLutherKingJr
from pandas.tseries.holiday import USMemorialDay
from pandas.tseries.holiday import USPresidentsDay
from pandas.tseries.holiday import USThanksgivingDay
from pandas.tseries.offsets import DateOffset
from pandas.tseries.offsets import Day
from pandas.tseries.offsets import Easter

# Time features

This block is used to extract and contruct time features listed below:

1. moh: minute_of_hour
2. hod: hour_of_day
3. dom: day_of_month
4. dow: day_of_week
5. doy: day_of_year
6. moy: month_of_year
7. woy: week_of_year
8. normalized distance (days) between the timestamp of a sample and each holiday (18 holidays are taken into consideration, so 18 columns will be generated)

Input is datetime index of dataset and holiday.

Output is a dataframe that contains features mentioned above.

In [None]:
"""Directory to extract time covariates.

Extract time covariates from datetime.
"""

# This is 183 to cover half a year (in both directions), also for leap years
# + 17 as Eastern can be between March, 22 - April, 25
MAX_WINDOW = 183 + 17


def _distance_to_holiday(holiday):
  """Return distance to given holiday."""

  def _distance_to_day(index):
    holiday_date = holiday.dates(
        index - pd.Timedelta(days=MAX_WINDOW),
        index + pd.Timedelta(days=MAX_WINDOW),
    )
    assert (
        len(holiday_date) != 0  # pylint: disable=g-explicit-length-test
    ), f"No closest holiday for the date index {index} found."
    # It sometimes returns two dates if it is exactly half a year after the
    # holiday. In this case, the smaller distance (182 days) is returned.
    return (index - holiday_date[0]).days

  return _distance_to_day


EasterSunday = Holiday(
    "Easter Sunday", month=1, day=1, offset=[Easter(), Day(0)]
)
NewYearsDay = Holiday("New Years Day", month=1, day=1)
SuperBowl = Holiday(
    "Superbowl", month=2, day=1, offset=DateOffset(weekday=SU(1))
)
MothersDay = Holiday(
    "Mothers Day", month=5, day=1, offset=DateOffset(weekday=SU(2))
)
IndependenceDay = Holiday("Independence Day", month=7, day=4)
ChristmasEve = Holiday("Christmas", month=12, day=24)
ChristmasDay = Holiday("Christmas", month=12, day=25)
NewYearsEve = Holiday("New Years Eve", month=12, day=31)
BlackFriday = Holiday(
    "Black Friday",
    month=11,
    day=1,
    offset=[pd.DateOffset(weekday=TH(4)), Day(1)],
)
CyberMonday = Holiday(
    "Cyber Monday",
    month=11,
    day=1,
    offset=[pd.DateOffset(weekday=TH(4)), Day(4)],
)

HOLIDAYS = [
    EasterMonday,
    GoodFriday,
    USColumbusDay,
    USLaborDay,
    USMartinLutherKingJr,
    USMemorialDay,
    USPresidentsDay,
    USThanksgivingDay,
    EasterSunday,
    NewYearsDay,
    SuperBowl,
    MothersDay,
    IndependenceDay,
    ChristmasEve,
    ChristmasDay,
    NewYearsEve,
    BlackFriday,
    CyberMonday,
]


class TimeCovariates(object):
  """Extract all time covariates except for holidays."""

  def __init__(
      self,
      datetimes,
      normalized = True,
      holiday = False,
  ):
    """Init function.

    Args:
      datetimes: pandas DatetimeIndex (lowest granularity supported is min)
      normalized: whether to normalize features or not
      holiday: fetch holiday features or not

    Returns:
      None
    """
    self.normalized = normalized
    self.dti = datetimes
    self.holiday = holiday

  def _minute_of_hour(self):
    minutes = np.array(self.dti.minute, dtype=np.float32)
    if self.normalized:
      minutes = minutes / 59.0 - 0.5
    return minutes

  def _hour_of_day(self):
    hours = np.array(self.dti.hour, dtype=np.float32)
    if self.normalized:
      hours = hours / 23.0 - 0.5
    return hours

  def _day_of_week(self):
    day_week = np.array(self.dti.dayofweek, dtype=np.float32)
    if self.normalized:
      day_week = day_week / 6.0 - 0.5
    return day_week

  def _day_of_month(self):
    day_month = np.array(self.dti.day, dtype=np.float32)
    if self.normalized:
      day_month = day_month / 30.0 - 0.5
    return day_month

  def _day_of_year(self):
    day_year = np.array(self.dti.dayofyear, dtype=np.float32)
    if self.normalized:
      day_year = day_year / 364.0 - 0.5
    return day_year

  def _month_of_year(self):
    month_year = np.array(self.dti.month, dtype=np.float32)
    if self.normalized:
      month_year = month_year / 11.0 - 0.5
    return month_year

  def _week_of_year(self):
    week_year = np.array(self.dti.strftime("%U").astype(int), dtype=np.float32)
    if self.normalized:
      week_year = week_year / 51.0 - 0.5
    return week_year

  def _get_holidays(self):
    dti_series = self.dti.to_series()
    hol_variates = np.vstack(
        [
            dti_series.apply(_distance_to_holiday(h)).values
            for h in tqdm(HOLIDAYS)
        ]
    )
    # hol_variates is (num_holiday, num_time_steps), the normalization should be
    # performed in the num_time_steps dimension.
    return StandardScaler().fit_transform(hol_variates.T).T

  def get_covariates(self):
    """Get all time covariates."""
    moh = self._minute_of_hour().reshape(1, -1)
    hod = self._hour_of_day().reshape(1, -1)
    dom = self._day_of_month().reshape(1, -1)
    dow = self._day_of_week().reshape(1, -1)
    doy = self._day_of_year().reshape(1, -1)
    moy = self._month_of_year().reshape(1, -1)
    woy = self._week_of_year().reshape(1, -1)

    all_covs = [
        moh,
        hod,
        dom,
        dow,
        doy,
        moy,
        woy,
    ]
    columns = ["moh", "hod", "dom", "dow", "doy", "moy", "woy"]
    if self.holiday:
      hol_covs = self._get_holidays()
      all_covs.append(hol_covs)
      columns += [f"hol_{i}" for i in range(len(HOLIDAYS))]

    return pd.DataFrame(
        data=np.vstack(all_covs).transpose(),
        columns=columns,
        index=self.dti,
    )


# Data loader

this block is used to

In [None]:
"""TF dataloaders for general timeseries datasets.

The expected input format is csv file with a datetime index.
"""

class TimeSeriesdata(object):
  """Data loader class."""

  def __init__(
      self,
      data_path,
      datetime_col,
      num_cov_cols,
      cat_cov_cols,
      ts_cols,
      train_range,
      val_range,
      test_range,
      hist_len,
      pred_len,
      batch_size,
      freq='H',
      normalize=True,
      epoch_len=None,
      holiday=False,
      permute=True,
  ):
    """Initialize objects.

    Args:
      data_path: path to csv file
      datetime_col: column name for datetime col
      num_cov_cols: list of numerical global covariates
      cat_cov_cols: list of categorical global covariates
      ts_cols: columns corresponding to ts
      train_range: tuple of train ranges
      val_range: tuple of validation ranges
      test_range: tuple of test ranges
      hist_len: historical context
      pred_len: prediction length
      batch_size: batch size (number of ts in a batch)
      freq: freq of original data
      normalize: std. normalize data or not
      epoch_len: num iters in an epoch
      holiday: use holiday features or not
      permute: permute ts in train batches or not

    Returns:
      None
    """
    self.data_df = pd.read_csv(open(data_path, 'r'))
    if not num_cov_cols:
      self.data_df['ncol'] = np.zeros(self.data_df.shape[0])
      num_cov_cols = ['ncol']
    if not cat_cov_cols:
      self.data_df['ccol'] = np.zeros(self.data_df.shape[0])
      cat_cov_cols = ['ccol']
    self.data_df.fillna(0, inplace=True)
    self.data_df.set_index(
        pd.DatetimeIndex(self.data_df[datetime_col]), inplace=True
    )
    self.num_cov_cols = num_cov_cols
    self.cat_cov_cols = cat_cov_cols
    self.ts_cols = ts_cols
    self.train_range = train_range
    self.val_range = val_range
    self.test_range = test_range
    data_df_idx = self.data_df.index

    ### date_index includes:
    ### 1. the time point in the original data index of dataset, and
    ### 2. pred_len + 1 time points in the future, which have the same frequency.
    date_index = data_df_idx.union(
        pd.date_range(
            data_df_idx[-1] + pd.Timedelta(1, freq=freq),
            periods=pred_len + 1,
            freq=freq,
        )
    )

    ### create a time_df that contains time covariates for all time points in date_index
    self.time_df = TimeCovariates(
        date_index, holiday=holiday
    ).get_covariates()

    self.hist_len = hist_len
    self.pred_len = pred_len
    self.batch_size = batch_size
    self.freq = freq
    self.normalize = normalize

    ### create numpy matrices for：
    ### 1. time series
    ### 2. time covariates
    ### 3. numerical global covariates
    ### 4. categorical global covariates
    self.data_mat = self.data_df[self.ts_cols].to_numpy().transpose()
    self.data_mat = self.data_mat[:, 0 : self.test_range[1]]
    self.time_mat = self.time_df.to_numpy().transpose()
    self.num_feat_mat = self.data_df[num_cov_cols].to_numpy().transpose()
    self.cat_feat_mat, self.cat_sizes = self._get_cat_cols(cat_cov_cols)

    self.normalize = normalize
    if normalize:
      self._normalize_data()
    logging.info(
        'Data Shapes: %s, %s, %s, %s',
        self.data_mat.shape,
        self.time_mat.shape,
        self.num_feat_mat.shape,
        self.cat_feat_mat.shape,
    )
    self.epoch_len = epoch_len
    self.permute = permute

### if a dataset contains categorical covaraites, for each covariate column:
### dct: a dictionary assign an index to each unique categorical value
### cat_sizes: the number of unique categorical value
### mapped: an array stating the index of the categorical value of each sample
###   for example, a dataset involves categorical covariate "location"
###   location columns is ['Newyork','Paris','London','Paris','Newyork']
###   dct = {'Newyork':0, 'Paris':1,'London':2}
###   cat_sizes = 3
###   mapped -> array([0, 1, 2, 1, 0])
### output contains:
### 1.  np.vstack(cat_vars): a matrix contains mapped for all categorical covariates in a df
### 2.  cat_sizes: a list of cat_size of all categorical covariates
  def _get_cat_cols(self, cat_cov_cols):
    """Get categorical columns."""
    cat_vars = []
    cat_sizes = []
    for col in cat_cov_cols:
      dct = {x: i for i, x in enumerate(self.data_df[col].unique())}
      cat_sizes.append(len(dct))
      mapped = self.data_df[col].map(lambda x: dct[x]).to_numpy().transpose()  # pylint: disable=cell-var-from-loop
      cat_vars.append(mapped)
    return np.vstack(cat_vars), cat_sizes

  def _normalize_data(self):
    self.scaler = StandardScaler()
    train_mat = self.data_mat[:, self.train_range[0] : self.train_range[1]]
    self.scaler = self.scaler.fit(train_mat.transpose())
    self.data_mat = self.scaler.transform(self.data_mat.transpose()).transpose()


  def train_gen(self):
    """Generator for training data."""
    num_ts = len(self.ts_cols)

    ### perm contains a series of index values
    ### that are used to select the time series to include in the training phase.
    ### permutation is applied to introduce randomness into the training process.
    perm = np.arange(
        self.train_range[0] + self.hist_len,
        self.train_range[1] - self.pred_len,
    )
    perm = np.random.permutation(perm)

    hist_len = self.hist_len
    logging.info('Hist len: %s', hist_len)

    if not self.epoch_len:
      epoch_len = len(perm)
    else:
      epoch_len = self.epoch_len
    for idx in perm[0:epoch_len]:
      for _ in range(num_ts // self.batch_size + 1):
        if self.permute:
          tsidx = np.random.choice(num_ts, size=self.batch_size, replace=False)
        else:
          tsidx = np.arange(num_ts)
        ### If self.permute is True, self.batch_size unique time series indexes are randomly selected.
        ### in each training batch, a different time series is selected for training
        ### This is useful for introducing randomness and diversity into the model during training.
        ### If self.permute is False, all time series indexes will be selected,
        ### all time series will be used for training
        dtimes = np.arange(idx - hist_len, idx + self.pred_len)
        (
            bts_train,
            bts_pred,
            bfeats_train,
            bfeats_pred,
            bcf_train,
            bcf_pred,
        ) = self._get_features_and_ts(dtimes, tsidx, hist_len)

        all_data = [
            bts_train,
            bfeats_train,
            bcf_train,
            bts_pred,
            bfeats_pred,
            bcf_pred,
            tsidx,
        ]
        yield tuple(all_data)

  def test_val_gen(self, mode='val'):
    """Generator for validation/test data."""
    if mode == 'val':
      start = self.val_range[0]
      end = self.val_range[1] - self.pred_len + 1
    elif mode == 'test':
      start = self.test_range[0]
      end = self.test_range[1] - self.pred_len + 1
    else:
      raise NotImplementedError('Eval mode not implemented')
    num_ts = len(self.ts_cols)
    hist_len = self.hist_len
    logging.info('Hist len: %s', hist_len)
    perm = np.arange(start, end)
    if self.epoch_len:
      epoch_len = self.epoch_len
    else:
      epoch_len = len(perm)
    for idx in perm[0:epoch_len]:
      for batch_idx in range(0, num_ts, self.batch_size):
        tsidx = np.arange(batch_idx, min(batch_idx + self.batch_size, num_ts))
        dtimes = np.arange(idx - hist_len, idx + self.pred_len)
        (
            bts_train,
            bts_pred,
            bfeats_train,
            bfeats_pred,
            bcf_train,
            bcf_pred,
        ) = self._get_features_and_ts(dtimes, tsidx, hist_len)
        all_data = [
            bts_train,
            bfeats_train,
            bcf_train,
            bts_pred,
            bfeats_pred,
            bcf_pred,
            tsidx,
        ]
        yield tuple(all_data)

### dtimes: np array of timestamp idx generated in train_gen and test_val_gen, indicating the specified window
###         from (current timestamp - hist_len) to (current timestamp + pred_len)
### tsidx: elements represent the position of the selected time series in the entire time series collection
  def _get_features_and_ts(self, dtimes, tsidx, hist_len=None):
    """Get features and ts in specified windows."""
    if hist_len is None:
      hist_len = self.hist_len
    data_times = dtimes[dtimes < self.data_mat.shape[1]]
    ### data_times: intersection of dtimes and timestamp index of the dataset, the available specified window
    ### eg. when current timestamp = the last timestamp of dataset part of the dtimes is out of range
    bdata = self.data_mat[:, data_times] ### time series matrix in specified window
    bts = bdata[tsidx, :] ### time series selected for a training batch
    bnf = self.num_feat_mat[:, data_times] ### numerical covariate matrix in specified window
    bcf = self.cat_feat_mat[:, data_times] ### categorical covariate matrix in specified window
    btf = self.time_mat[:, dtimes] ### time covariate matrix in specified window + prediction horizon
    if bnf.shape[1] < btf.shape[1]: ### time covariate matrix covers longer time steps, deal with the diffence in shape
      rem_len = btf.shape[1] - bnf.shape[1]
      rem_rep = np.repeat(bnf[:, [-1]], repeats=rem_len)
      rem_rep_cat = np.repeat(bcf[:, [-1]], repeats=rem_len)
      bnf = np.hstack([bnf, rem_rep.reshape(bnf.shape[0], -1)])
      bcf = np.hstack([bcf, rem_rep_cat.reshape(bcf.shape[0], -1)])
    bfeats = np.vstack([btf, bnf]) ### time covariate + numerical covariate -> feature matrix
    bts_train = bts[:, 0:hist_len]
    bts_pred = bts[:, hist_len:]
    bfeats_train = bfeats[:, 0:hist_len]
    bfeats_pred = bfeats[:, hist_len:]
    bcf_train = bcf[:, 0:hist_len]
    bcf_pred = bcf[:, hist_len:]
    return bts_train, bts_pred, bfeats_train, bfeats_pred, bcf_train, bcf_pred

### function for train, validation and test datasets generation
  def tf_dataset(self, mode='train'):
    """Tensorflow Dataset."""
    if mode == 'train':
      gen_fn = self.train_gen
    else:
      gen_fn = lambda: self.test_val_gen(mode)
    output_types = tuple(
        [tf.float32] * 2 + [tf.int32] + [tf.float32] * 2 + [tf.int32] * 2
    )
    dataset = tf.data.Dataset.from_generator(gen_fn, output_types)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    return dataset


# TiDE Model

In [None]:
EPS = 1e-7

train_loss = keras.losses.MeanSquaredError()

### a simple MLP residual block contains:
### 1. dense layer with ReLU as activation - lin_a
### 2. dense linear layer - lin_b
### 3. skip connection - lin_res
class MLPResidual(keras.layers.Layer):
  """Simple one hidden state residual network."""

  def __init__(
      self, hidden_dim, output_dim, layer_norm=False, dropout_rate=0.0
  ):
    super(MLPResidual, self).__init__()
    self.lin_a = tf.keras.layers.Dense(
        hidden_dim,
        activation='relu',
    )
    self.lin_b = tf.keras.layers.Dense(
        output_dim,
        activation=None,
    )
    self.lin_res = tf.keras.layers.Dense(
        output_dim,
        activation=None,
    )
    if layer_norm:
      self.lnorm = tf.keras.layers.LayerNormalization()
    self.layer_norm = layer_norm
    self.dropout = tf.keras.layers.Dropout(dropout_rate)

  def call(self, inputs):
    """Call method."""
    h_state = self.lin_a(inputs)
    out = self.lin_b(h_state)
    out = self.dropout(out)
    res = self.lin_res(inputs)
    if self.layer_norm:
      return self.lnorm(out + res)
    return out + res

### funciton to make multi layer dense encoder and dense decoder
def _make_dnn_residual(hidden_dims, layer_norm=False, dropout_rate=0.0):
  """Multi-layer DNN residual model."""
  if len(hidden_dims) < 2:
    return keras.layers.Dense(
        hidden_dims[-1],
        activation=None,
    )
  layers = []
  for i, hdim in enumerate(hidden_dims[:-1]):
    layers.append(
        MLPResidual(
            hdim,
            hidden_dims[i + 1],
            layer_norm=layer_norm,
            dropout_rate=dropout_rate,
        )
    )
  return keras.Sequential(layers)


class TideModel(keras.Model):
  """Main class for multi-scale DNN model."""

  def __init__(
      self,
      model_config,
      pred_len,
      cat_sizes,
      num_ts,
      transform=False,
      cat_emb_size=4,
      layer_norm=False,
      dropout_rate=0.0,
  ):
    """Tide model.

    Args:
      model_config: configurations specific to the model.
      pred_len: prediction horizon length.
      cat_sizes: number of categories in each categorical covariate.
      num_ts: number of time-series in the dataset
      transform: apply reversible transform or not.
      cat_emb_size: embedding size of categorical variables.
      layer_norm: use layer norm or not.
      dropout_rate: level of dropout.
    """
    super().__init__()
    self.model_config = model_config
    self.transform = transform
    if self.transform:
      self.affine_weight = self.add_weight(
          name='affine_weight',
          shape=(num_ts,),
          initializer='ones',
          trainable=True,
      )

      self.affine_bias = self.add_weight(
          name='affine_bias',
          shape=(num_ts,),
          initializer='zeros',
          trainable=True,
      )
    self.pred_len = pred_len

    ### dense encoder has multiple layers with the same hiddenSize
    self.encoder = _make_dnn_residual(
        model_config.get('hidden_dims'),
        layer_norm=layer_norm,
        dropout_rate=dropout_rate,
    )

    ### the last layer of dense decoder has an output dim as H*p
    self.decoder = _make_dnn_residual(
        model_config.get('hidden_dims')[:-1]
        + [
            model_config.get('decoder_output_dim') * self.pred_len,
        ],
        layer_norm=layer_norm,
        dropout_rate=dropout_rate,
    )
    self.linear = tf.keras.layers.Dense(
        self.pred_len,
        activation=None,
    )

    ### time covariates are projected using a MLPResidual
    self.time_encoder = _make_dnn_residual(
        model_config.get('time_encoder_dims'),
        layer_norm=layer_norm,
        dropout_rate=dropout_rate,
    )

    ### temporal decoder has an output dim as 1, to generate prediction for specific time-series
    self.final_decoder = MLPResidual(
        hidden_dim=model_config.get('final_decoder_hidden'),
        output_dim=1,
        layer_norm=layer_norm,
        dropout_rate=dropout_rate,
    )

    ### catgorical embedding and time-series embedding
    self.cat_embs = []
    for cat_size in cat_sizes:
      self.cat_embs.append(
          tf.keras.layers.Embedding(input_dim=cat_size, output_dim=cat_emb_size)
      )
    self.ts_embs = tf.keras.layers.Embedding(input_dim=num_ts, output_dim=16)

  @tf.function
  def _assemble_feats(self, feats, cfeats):
    """assemble all features."""
    all_feats = [feats]
    for i, emb in enumerate(self.cat_embs):
      all_feats.append(tf.transpose(emb(cfeats[i, :])))
    return tf.concat(all_feats, axis=0)

  @tf.function
  def call(self, inputs):
    """Call function that takes in a batch of training data and features."""
    ### inputs is "all_data" created by train_gen and eval_test_gen
    ### [bts_train, bfeats_train, bcf_train, bts_pred, bfeats_pred, bcf_pred, tsidx]
    ### past_data: time series within lookback period
    ### past_ts: bts_train
    ### past_feats: features including attribute, numerical and categorical covariates
    ### future_feats: bfeats_pred + bcf_pred
    past_data = inputs[0]
    future_features = inputs[1]
    bsize = past_data[0].shape[0]
    tsidx = inputs[2]
    past_feats = self._assemble_feats(past_data[1], past_data[2])
    future_feats = self._assemble_feats(future_features[0], future_features[1])
    past_ts = past_data[0]

    ### batch normalization and affine transformation
    if self.transform:
      affine_weight = tf.gather(self.affine_weight, tsidx)
      affine_bias = tf.gather(self.affine_bias, tsidx)
      batch_mean = tf.math.reduce_mean(past_ts, axis=1)
      batch_std = tf.math.reduce_std(past_ts, axis=1)
      batch_std = tf.where(
          tf.math.equal(batch_std, 0.0), tf.ones_like(batch_std), batch_std
      )
      past_ts = (past_ts - batch_mean[:, None]) / batch_std[:, None]
      past_ts = affine_weight[:, None] * past_ts + affine_bias[:, None]

    ### training flow
    encoded_past_feats = tf.transpose(
        self.time_encoder(tf.transpose(past_feats))
    )
    encoded_future_feats = tf.transpose(
        self.time_encoder(tf.transpose(future_feats))
    )
    enc_past = tf.repeat(tf.expand_dims(encoded_past_feats, axis=0), bsize, 0)
    enc_past = tf.reshape(enc_past, [bsize, -1])
    enc_fut = tf.repeat(
        tf.expand_dims(encoded_future_feats, axis=0), bsize, 0
    )  # batch x fdim x H
    enc_future = tf.reshape(enc_fut, [bsize, -1])
    residual_out = self.linear(past_ts)
    ts_embs = self.ts_embs(tsidx)
    encoder_input = tf.concat([past_ts, enc_past, enc_future, ts_embs], axis=1)
    encoding = self.encoder(encoder_input)
    decoder_out = self.decoder(encoding)
    decoder_out = tf.reshape(
        decoder_out, [bsize, -1, self.pred_len]
    )  # batch x d x H
    final_in = tf.concat([decoder_out, enc_fut], axis=1)
    out = self.final_decoder(tf.transpose(final_in, (0, 2, 1)))  # B x H x 1
    out = tf.squeeze(out, axis=-1)
    out += residual_out
    if self.transform:
      out = (out - affine_bias[:, None]) / (affine_weight[:, None] + EPS)
      out = out * batch_std[:, None] + batch_mean[:, None]
    return out

  @tf.function
  def train_step(self, past_data, future_features, ytrue, tsidx, optimizer):
    """One step of training."""

    ### mathematic calculation of loss
    with tf.GradientTape() as tape:
      all_preds = self((past_data, future_features, tsidx), training=True)
      loss = train_loss(ytrue, all_preds)

    ### mathematic calculation of derivatives
    grads = tape.gradient(loss, self.trainable_variables)
    optimizer.apply_gradients(zip(grads, self.trainable_variables))
    return loss

  def get_all_eval_data(self, data, mode, num_split=1):
    y_preds = []
    y_trues = []
    all_test_loss = 0
    all_test_num = 0
    idxs = np.arange(0, self.pred_len, self.pred_len // num_split).tolist() + [
        self.pred_len
    ]
    for i in range(len(idxs) - 1):
      indices = (idxs[i], idxs[i + 1])
      logging.info('Getting data for indices: %s', indices)
      all_y_true, all_y_pred, test_loss, test_num = (
          self.get_eval_data_for_split(data, mode, indices)
      )
      y_preds.append(all_y_pred)
      y_trues.append(all_y_true)
      all_test_loss += test_loss
      all_test_num += test_num
    return np.hstack(y_preds), np.hstack(y_trues), all_test_loss / all_test_num

  def get_eval_data_for_split(self, data, mode, indices):
    iterator = data.tf_dataset(mode=mode)

    all_y_true = None
    all_y_pred = None

    def set_or_concat(a, b):
      if a is None:
        return b
      return tf.concat((a, b), axis=1)

    all_test_loss = 0
    all_test_num = 0
    ts_count = 0
    ypreds = []
    ytrues = []
    for all_data in tqdm(iterator):
      past_data = all_data[:3]
      future_features = all_data[4:6]
      y_true = all_data[3]
      tsidx = all_data[-1]
      all_preds = self((past_data, future_features, tsidx), training=False)
      y_pred = all_preds
      y_pred = y_pred[:, 0 : y_true.shape[1]]
      id1 = indices[0]
      id2 = min(indices[1], y_true.shape[1])
      y_pred = y_pred[:, id1:id2]
      y_true = y_true[:, id1:id2]
      loss = train_loss(y_true, y_pred)
      all_test_loss += loss
      all_test_num += 1
      ts_count += y_true.shape[0]
      ypreds.append(y_pred)
      ytrues.append(y_true)
      if ts_count >= len(data.ts_cols):
        ts_count = 0
        ypreds = tf.concat(ypreds, axis=0)
        ytrues = tf.concat(ytrues, axis=0)
        all_y_true = set_or_concat(all_y_true, ytrues)
        all_y_pred = set_or_concat(all_y_pred, ypreds)
        ypreds = []
        ytrues = []
    return (
        all_y_true.numpy(),
        all_y_pred.numpy(),
        all_test_loss.numpy(),
        all_test_num,
    )

  def eval(self, data, mode, num_split=1):
    all_y_pred, all_y_true, test_loss = self.get_all_eval_data(
        data, mode, num_split
    )

    result_dict = {}
    for metric in METRICS:
      eval_fn = METRICS[metric]
      result_dict[metric] = np.float64(eval_fn(all_y_pred, all_y_true))

    logging.info(result_dict)
    logging.info('Loss: %f', test_loss)

    return (
        result_dict,
        (all_y_pred, all_y_true),
        test_loss,
    )


def mape(y_pred, y_true):
  abs_diff = np.abs(y_pred - y_true).flatten()
  abs_val = np.abs(y_true).flatten()
  idx = np.where(abs_val > EPS)
  mpe = np.mean(abs_diff[idx] / abs_val[idx])
  return mpe


def mae_loss(y_pred, y_true):
  return np.abs(y_pred - y_true).mean()


def wape(y_pred, y_true):
  abs_diff = np.abs(y_pred - y_true)
  abs_val = np.abs(y_true)
  wpe = np.sum(abs_diff) / (np.sum(abs_val) + EPS)
  return wpe


def smape(y_pred, y_true):
  abs_diff = np.abs(y_pred - y_true)
  abs_mean = (np.abs(y_true) + np.abs(y_pred)) / 2
  smpe = np.mean(abs_diff / (abs_mean + EPS))
  return smpe


def rmse(y_pred, y_true):
  return np.sqrt(np.square(y_pred - y_true).mean())


def nrmse(y_pred, y_true):
  mse = np.square(y_pred - y_true)
  return np.sqrt(mse.mean()) / np.abs(y_true).mean()


METRICS = {
    'mape': mape,
    'wape': wape,
    'smape': smape,
    'nrmse': nrmse,
    'rmse': rmse,
    'mae': mae_loss,
}


# Train model

## Reproduce

We run the model on the electricity.csv dataset and part of the results is displayed in the markdown block at the end of this file.

In [None]:
FLAGS = flags.FLAGS

In [None]:
tf.compat.v1.flags.DEFINE_string('f','','')

<absl.flags._flagvalues.FlagHolder at 0x78d208c642e0>

In [None]:
flags.DEFINE_integer('train_epochs', 100, 'Number of epochs to train')
flags.DEFINE_integer('patience', 40, 'Patience for early stopping')
flags.DEFINE_integer('epoch_len', None, 'number of iterations in an epoch')
flags.DEFINE_integer(
    'batch_size', 512, 'Batch size for the randomly sampled batch'
)
flags.DEFINE_float('learning_rate', 1e-4, 'Learning rate')


# Non tunable flags
flags.DEFINE_string(
    'expt_dir',
    './results',
    'The name of the experiment dir',
)
flags.DEFINE_string('dataset', 'elec', 'The name of the dataset.')
flags.DEFINE_string('datetime_col', 'date', 'Column having datetime.')
flags.DEFINE_list('num_cov_cols', None, 'Column having numerical features.')
flags.DEFINE_list('cat_cov_cols', None, 'Column having categorical features.')
flags.DEFINE_integer('hist_len', 720, 'Length of the history provided as input')
flags.DEFINE_integer('pred_len', 720, 'Length of pred len during training')
flags.DEFINE_integer('num_layers', 2, 'Number of DNN layers')
flags.DEFINE_integer('hidden_size', 256, 'Hidden size of DNN')
flags.DEFINE_integer('decoder_output_dim', 4, 'Hidden d3 of DNN')
flags.DEFINE_integer('final_decoder_hidden', 64, 'Hidden d3 of DNN')
flags.DEFINE_list('ts_cols', None, 'Columns of time-series features')
flags.DEFINE_integer(
    'random_seed', None, 'The random seed to be used for TF and numpy'
)
flags.DEFINE_bool('normalize', True, 'normalize data for training or not')
flags.DEFINE_bool('holiday', False, 'use holiday features or not')
flags.DEFINE_bool('permute', True, 'permute the order of TS in training set')
flags.DEFINE_bool('transform', False, 'Apply chronoml transform or not.')
flags.DEFINE_bool('layer_norm', False, 'Apply layer norm or not.')
flags.DEFINE_float('dropout_rate', 0.0, 'dropout rate')
flags.DEFINE_integer('num_split', 1, 'number of splits during inference.')
flags.DEFINE_integer(
    'min_num_epochs', 0, 'minimum number of epochs before early stopping'
)
flags.DEFINE_integer('gpu', 0, 'index of gpu to be used.')

In [None]:
"""Main training code."""

### run model on the elec dataset.

DATA_DICT = {
    # 'ettm2': {
    #     'boundaries': [34560, 46080, 57600],
    #     'data_path': './datasets/ETT-small/ETTm2.csv',
    #     'freq': '15min',
    # },
    # 'ettm1': {
    #     'boundaries': [34560, 46080, 57600],
    #     'data_path': './datasets/ETT-small/ETTm1.csv',
    #     'freq': '15min',
    # },
    # 'etth2': {
    #     'boundaries': [8640, 11520, 14400],
    #     'data_path': './datasets/ETT-small/ETTh2.csv',
    #     'freq': 'H',
    # },
    # 'etth1': {
    #     'boundaries': [8640, 11520, 14400],
    #     'data_path': './datasets/ETT-small/ETTh1.csv',
    #     'freq': 'H',
    # },
    'elec': {
        'boundaries': [18413, 21044, 26304],
        'data_path': './datasets/electricity/electricity.csv',
        'freq': 'H',
    },
    # 'traffic': {
    #     'boundaries': [12280, 14036, 17544],
    #     'data_path': './datasets/traffic/traffic.csv',
    #     'freq': 'H',
    # },
    # 'weather': {
    #     'boundaries': [36887, 42157, 52696],
    #     'data_path': './datasets/weather/weather.csv',
    #     'freq': '10min',
    # },
}

np.random.seed(1024)
tf.random.set_seed(1024)


def _get_random_string(num_chars):
  rand_str = ''.join(
      random.choice(
          string.ascii_uppercase + string.ascii_lowercase + string.digits
      )
      for _ in range(num_chars - 1)
  )
  return rand_str

In [None]:
def training():
  """Training TS code."""
  tf.random.set_seed(FLAGS.random_seed)
  np.random.seed(FLAGS.random_seed)

  experiment_id = _get_random_string(8)
  logging.info('Experiment id: %s', experiment_id)

  dataset = FLAGS.dataset
  data_path = DATA_DICT[dataset]['data_path']
  freq = DATA_DICT[dataset]['freq']
  boundaries = DATA_DICT[dataset]['boundaries']

  data_df = pd.read_csv(open(data_path, 'r'))

  if FLAGS.ts_cols:
    ts_cols = DATA_DICT[dataset]['ts_cols']
    num_cov_cols = DATA_DICT[dataset]['num_cov_cols']
    cat_cov_cols = DATA_DICT[dataset]['cat_cov_cols']
  else:
    ts_cols = [col for col in data_df.columns if col != FLAGS.datetime_col]
    num_cov_cols = None
    cat_cov_cols = None
  permute = FLAGS.permute
  dtl = TimeSeriesdata(
      data_path=data_path,
      datetime_col=FLAGS.datetime_col,
      num_cov_cols=num_cov_cols,
      cat_cov_cols=cat_cov_cols,
      ts_cols=np.array(ts_cols),
      train_range=[0, boundaries[0]],
      val_range=[boundaries[0], boundaries[1]],
      test_range=[boundaries[1], boundaries[2]],
      hist_len=FLAGS.hist_len,
      pred_len=FLAGS.pred_len,
      batch_size=min(FLAGS.batch_size, len(ts_cols)),
      freq=freq,
      normalize=FLAGS.normalize,
      epoch_len=FLAGS.epoch_len,
      holiday=FLAGS.holiday,
      permute=permute,
  )

  # Create model
  model_config = {
      'model_type': 'dnn',
      'hidden_dims': [FLAGS.hidden_size] * FLAGS.num_layers,
      'time_encoder_dims': [64, 4],
      'decoder_output_dim': FLAGS.decoder_output_dim,
      'final_decoder_hidden': FLAGS.final_decoder_hidden,
      'batch_size': dtl.batch_size,
  }
  model = TideModel(
      model_config=model_config,
      pred_len=FLAGS.pred_len,
      num_ts=len(ts_cols),
      cat_sizes=dtl.cat_sizes,
      transform=FLAGS.transform,
      layer_norm=FLAGS.layer_norm,
      dropout_rate=FLAGS.dropout_rate,
  )

  # Compute path to experiment directory
  expt_dir = os.path.join(
      FLAGS.expt_dir,
      FLAGS.dataset + '_' + str(experiment_id) + '_' + str(FLAGS.pred_len),
  )
  os.makedirs(expt_dir, exist_ok=True)

  step = tf.Variable(0)
  # LR scheduling
  lr_schedule = keras.optimizers.schedules.CosineDecay(
      initial_learning_rate=FLAGS.learning_rate,
      decay_steps=30 * dtl.train_range[1],
  )

  optimizer = keras.optimizers.Adam(learning_rate=lr_schedule, clipvalue=1e3)
  summary = Summary(expt_dir)

  best_loss = np.inf
  pat = 0
  mean_loss_array = []
  iter_array = []
  # best_check_path = None
  while step.numpy() < FLAGS.train_epochs + 1:
    ep = step.numpy()
    logging.info('Epoch %s', ep)
    sys.stdout.flush()

    iterator = tqdm(dtl.tf_dataset(mode='train'), mininterval=2)
    for i, batch in enumerate(iterator):
      past_data = batch[:3]
      future_features = batch[4:6]
      tsidx = batch[-1]
      loss = model.train_step(
          past_data, future_features, batch[3], tsidx, optimizer
      )
      # Train metrics
      summary.update({'train/reg_loss': loss, 'train/loss': loss})
      if i % 100 == 0:
        mean_loss = summary.metric_dict['train/reg_loss'].result().numpy()
        mean_loss_array.append(mean_loss)
        iter_array.append(i)
        iterator.set_description(f'Loss {mean_loss:.4f}')
    step.assign_add(1)
    # Test metrics
    val_metrics, val_res, val_loss = model.eval(
        dtl, 'val', num_split=FLAGS.num_split
    )
    test_metrics, test_res, test_loss = model.eval(
        dtl, 'test', num_split=FLAGS.num_split
    )
    logging.info('Val Loss: %s', val_loss)
    logging.info('Test Loss: %s', test_loss)
    tracked_loss = val_metrics['rmse']
    if tracked_loss < best_loss and ep > FLAGS.min_num_epochs:
      best_loss = tracked_loss
      pat = 0

      with open(os.path.join(expt_dir, 'val_pred.npy'), 'wb') as fp:
        np.save(fp, val_res[0][:, 0 : -1 : FLAGS.pred_len])
      with open(os.path.join(expt_dir, 'val_true.npy'), 'wb') as fp:
        np.save(fp, val_res[1][:, 0 : -1 : FLAGS.pred_len])

      with open(os.path.join(expt_dir, 'test_pred.npy'), 'wb') as fp:
        np.save(fp, test_res[0][:, 0 : -1 : FLAGS.pred_len])
      with open(os.path.join(expt_dir, 'test_true.npy'), 'wb') as fp:
        np.save(fp, test_res[1][:, 0 : -1 : FLAGS.pred_len])
      with open(os.path.join(expt_dir, 'test_metrics.json'), 'w') as fp:
        json.dump(test_metrics, fp)
      logging.info('saved best result so far at %s', expt_dir)
      logging.info('Test metrics: %s', test_metrics)
    else:
      pat += 1
      if pat > FLAGS.patience:
        logging.info('Early stopping')
        break
    summary.write(step=step.numpy())


class Summary:
  """Summary statistics."""

  def __init__(self, log_dir):
    self.metric_dict = {}
    self.writer = tf.summary.create_file_writer(log_dir)

  def update(self, update_dict):
    for metric in update_dict:
      if metric not in self.metric_dict:
        self.metric_dict[metric] = keras.metrics.Mean()
      self.metric_dict[metric].update_state(values=[update_dict[metric]])

  def write(self, step):
    with self.writer.as_default():
      for metric in self.metric_dict:
        tf.summary.scalar(metric, self.metric_dict[metric].result(), step=step)
    self.metric_dict = {}
    self.writer.flush()


def main(_):
  training()

In [None]:
if __name__ == '__main__':
  app.run(main)

## Part of the reproducing results

Due to the limited resources (i.e GPU), the reproducing results we obtained are listed below.

Within 22 hours, our device is only able to run 2 epoches for the first dataset (from 10am to 8am). We also try to run on google colab, though the running speed is higher (48it/s vs 9it/s on local environment), while personal trial of google account was occupied after running 2 epochs for electricity dataset. We suggest higher capacity of device when reproducing codes, such as cloud platform with higher capacity or at least i7/i10 core for higher running speed. Due to time limits, we are not able to manage to complete all reproduction, but it is capable with recommendations above.

I1102 13:46:49.093221 140704456412800 train.py:146] Experiment id: XOZsbFA

I1102 13:46:51.977347 140704456412800 data_loader.py:116] Data Shapes: (321, 26304), (7, 27025), (1, 26304), (1, 26304)

2023-11-02 13:46:52.003338: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA

To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.

**I1102 13:46:52.125188 140704456412800 train.py:226] Epoch 0**

0it [00:00, ?it/s]I1102 13:46:52.394349 123145383587840 data_loader.py:152] Hist len: 720

Loss 0.2096: : 33946it [1:50:31,  5.12it/s]

I1102 15:37:23.699673 140704456412800 models.py:242] Getting data for indices: (0, 720)

0it [00:00, ?it/s]I1102 15:37:24.198838 123145411645440 data_loader.py:196] Hist len: 720

1912it [16:34,  1.92it/s]

I1102 15:55:51.894650 140704456412800 models.py:311] {'mape': 2.3208236694335938, 'wape': 0.3506520078911239, 'smape': 0.5850090384483337, 'nrmse': 0.5434972643852234, 'rmse': 0.40702003240585327, 'mae': 0.26260003447532654}

I1102 15:55:51.915013 140704456412800 models.py:312] Loss: 0.165665

I1102 15:55:52.010943 140704456412800 models.py:242] Getting data for indices: (0, 720)

0it [00:00, ?it/s]I1102 15:55:52.620486 123145411645440 data_loader.py:196] Hist len: 720

4541it [4:52:30,  3.86s/it]

I1102 21:12:12.041450 140704456412800 models.py:311] {'mape': 2.3315513134002686, 'wape': 0.3833434787576122, 'smape': 0.5768746733665466, 'nrmse': 0.5675468444824219, 'rmse': 0.47104641795158386, 'mae': 0.31816330552101135}

I1102 21:12:12.131451 140704456412800 models.py:312] Loss: 0.221885

I1102 21:12:12.141915 140704456412800 train.py:252] Val Loss: 0.16566508783954956

I1102 21:12:12.146863 140704456412800 train.py:253] Test Loss: 0.22188502422804035

**I1102 21:12:13.022655 140704456412800 train.py:226] Epoch 1**

0it [00:00, ?it/s]I1102 21:12:13.991549 123145411645440 data_loader.py:152] Hist len: 720

Loss 0.1617: : 33946it [1:42:52,  5.50it/s]

I1102 22:55:06.594316 140704456412800 models.py:242] Getting data for indices: (0, 720)

0it [00:00, ?it/s]I1102 22:55:06.772835 123145411645440 data_loader.py:196] Hist len: 720

1912it [14:52,  2.14it/s]

I1102 23:11:41.612770 140704456412800 models.py:311] {'mape': 2.183418035507202, 'wape': 0.3475286474837033, 'smape': 0.5901986956596375, 'nrmse': 0.5428066253662109, 'rmse': 0.4065028429031372, 'mae': 0.2602609694004059}

I1102 23:11:41.622642 140704456412800 models.py:312] Loss: 0.165243

I1102 23:11:41.681569 140704456412800 models.py:242] Getting data for indices: (0, 720)

0it [00:00, ?it/s]I1102 23:11:42.006523 123145384124416 data_loader.py:196] Hist len: 720

4541it [3:20:40,  2.65s/it]

I1103 02:52:24.920064 140704456412800 models.py:311] {'mape': 2.221997022628784, 'wape': 0.37962246531238414, 'smape': 0.5742937326431274, 'nrmse': 0.5680287480354309, 'rmse': 0.4714463949203491, 'mae': 0.31507495045661926}

I1103 02:52:24.989138 140704456412800 models.py:312] Loss: 0.222263

I1103 02:52:25.181510 140704456412800 train.py:252] Val Loss: 0.1652434440836248

I1103 02:52:25.182302 140704456412800 train.py:253] Test Loss: 0.22226341262679614

I1103 02:53:28.397828 140704456412800 train.py:270] saved best result so far at ./results/elec_XOZsbFA_720

I1103 02:53:28.414479 140704456412800 train.py:271] Test metrics: {'mape': 2.221997022628784, 'wape': 0.37962246531238414, 'smape': 0.5742937326431274, 'nrmse': 0.5680287480354309, 'rmse': 0.4714463949203491, 'mae': 0.31507495045661926}

**I1103 02:53:29.736258 140704456412800 train.py:226] Epoch 2**

0it [00:00, ?it/s]I1103 02:53:34.689530 123145384660992 data_loader.py:152] Hist len: 720

Loss 0.1463: : 33946it [1:35:41,  5.91it/s]

I1103 04:29:14.555700 140704456412800 models.py:242] Getting data for indices: (0, 720)

0it [00:00, ?it/s]I1103 04:29:14.668433 123145412182016 data_loader.py:196] Hist len: 720

1912it [14:18,  2.23it/s]

I1103 04:44:44.205847 140704456412800 models.py:311] {'mape': 2.2145016193389893, 'wape': 0.3410096805703794, 'smape': 0.5751430988311768, 'nrmse': 0.5402857661247253, 'rmse': 0.40461498498916626, 'mae': 0.25537896156311035}

I1103 04:44:44.221186 140704456412800 models.py:312] Loss: 0.163713

I1103 04:44:44.273549 140704456412800 models.py:242] Getting data for indices: (0, 720)

0it [00:00, ?it/s]I1103 04:44:44.733463 123145412182016 data_loader.py:196] Hist len: 720

4541it [2:55:21,  2.32s/it]

I1103 07:58:07.520910 140704456412800 models.py:311] {'mape': 2.151806354522705, 'wape': 0.3721396975328801, 'smape': 0.5671623349189758, 'nrmse': 0.5736586451530457, 'rmse': 0.4761190414428711, 'mae': 0.3088645040988922}

I1103 07:58:07.595725 140704456412800 models.py:312] Loss: 0.226688

I1103 07:58:07.730432 140704456412800 train.py:252] Val Loss: 0.16371311203705216

I1103 07:58:07.730627 140704456412800 train.py:253] Test Loss: 0.22668814559361924

I1103 07:59:01.703009 140704456412800 train.py:270] saved best result so far at ./results/elec_XOZsbFA_720

I1103 07:59:01.706350 140704456412800 train.py:271] Test metrics: {'mape': 2.151806354522705, 'wape': 0.3721396975328801, 'smape': 0.5671623349189758, 'nrmse': 0.5736586451530457, 'rmse': 0.4761190414428711, 'mae': 0.3088645040988922}

**I1103 07:59:02.294898 140704456412800 train.py:226] Epoch 3**

0it [00:00, ?it/s]I1103 07:59:04.099279 123145411645440 data_loader.py:152] Hist len: 720

Loss 0.1358: : 33946it [1:44:32,  5.41it/s]

I1103 09:43:36.007414 140704456412800 models.py:242] Getting data for indices: (0, 720)

0it [00:00, ?it/s]I1103 09:43:36.652187 123145412182016 data_loader.py:196] Hist len: 720

1912it [16:35,  1.92it/s]

I1103 10:02:00.228435 140704456412800 models.py:311] {'mape': 2.299938678741455, 'wape': 0.34981897640881904, 'smape': 0.5845889449119568, 'nrmse': 0.5494011044502258, 'rmse': 0.4114413857460022, 'mae': 0.2619761824607849}

I1103 10:02:00.242700 140704456412800 models.py:312] Loss: 0.169284

I1103 10:02:00.302606 140704456412800 models.py:242] Getting data for indices: (0, 720)

0it [00:00, ?it/s]I1103 10:02:00.949230 123145384660992 data_loader.py:196] Hist len: 720
     
3573it [58:44, 12.79s/it]

The authors perform demand forecasting on m5 forecasting dataset. However, no py file wrt this experiment is provided.

## Ablation Study modified code

The authors perform ablation Study on a modified electricity dataset. We create the below code to modify the dataset.

In [None]:
### modification code for dataset

data_df = pd.read_csv('datasets/electricity/electricity.csv')
ts_cols = [col for col in data_df.columns if col != FLAGS.datetime_col]
num_ts = len(ts_cols)
ts_length = data_df.shape[0]

### create event type a and b
event_type_a = np.random.choice([0, 1], num_ts, p=[0.2, 0.8])
event_type_b = np.random.choice([0, 1], num_ts, p=[0.2, 0.8])

### statistical characteristic for event a
mean_a = [1.0, 2.0, 2.0, 1.0]
variance_a = 0.1
event_type_a_covariates = np.random.normal(mean_a, np.sqrt(variance_a), size=(num_ts, 4))

### statistical characteristic for event b
### no detail information is provided by the authors, we make up one
mean_b = [3.0, 2.5, 1.5, 2.0]
variance_b = 0.15
event_type_b_covariates = np.random.normal(mean_b, np.sqrt(variance_b), size=(num_ts, 4))

### events occur for 24 contiguous hours
for i in range(num_ts):
    if event_type_a[i] == 1:
        factor = np.random.uniform(3, 3.2)
        data_df[i, :24] *= factor
    if event_type_b[i] == 1:
        factor = np.random.uniform(2, 2.2)
        data_df[i, :24] /= factor

modified_elec = data_df
modified_elec.to_csv('datasets/electricity/modified_elec.csv')

The training flow section of the class TideModel should be modified as follow.  We do not implement as computing resoures are limited.

In [None]:
class TideModel(keras.Model):

### ...
### training flow
    encoded_past_feats = tf.transpose(
        self.time_encoder(tf.transpose(past_feats))
    )
    encoded_future_feats = tf.transpose(
        self.time_encoder(tf.transpose(future_feats))
    )
    enc_past = tf.repeat(tf.expand_dims(encoded_past_feats, axis=0), bsize, 0)
    enc_past = tf.reshape(enc_past, [bsize, -1])
    enc_fut = tf.repeat(
        tf.expand_dims(encoded_future_feats, axis=0), bsize, 0
    )  # batch x fdim x H
    enc_future = tf.reshape(enc_fut, [bsize, -1])
    residual_out = self.linear(past_ts)
    ts_embs = self.ts_embs(tsidx)
    encoder_input = tf.concat([past_ts, enc_past, enc_future, ts_embs], axis=1)
    encoding = self.encoder(encoder_input)
    decoder_out = self.decoder(encoding)
    decoder_out = tf.reshape(
        decoder_out, [bsize, -1, self.pred_len]
    )  # batch x d x H
    
    ###### deactivate the lines below
    ###### final_in = tf.concat([decoder_out, enc_fut], axis=1)
    ###### out = self.final_decoder(tf.transpose(final_in, (0, 2, 1)))  # B x H x 1
    
    ###### modified line below
    ###### original line:
    ###### out = tf.squeeze(out, axis=-1)
    out = tf.squeeze(decoder_out, axis=-1)

    out += residual_out
    if self.transform:
      out = (out - affine_bias[:, None]) / (affine_weight[:, None] + EPS)
      out = out * batch_std[:, None] + batch_mean[:, None]
    return out

### ...