# **Time Series**: Forecasting with a Recurrent Neural Network

Source:  [https://github.com/d-insight/code-bank.git](https://github.com/d-insight/code-bank.git)  
License: [MIT License](https://opensource.org/licenses/MIT). See open source [license](LICENSE) in the Code Bank repository. 

-------------

## Overview

In this illustration, we will develop a univerate and multivariate time series model using a recurrent neural network (RNN). 

For data we use a weather time series recorded by the [Max Planck Institute for Biogeochemistry](https://www.bgc-jena.mpg.de/wetter/). The dataset contains 14 different features such as air temperature, atmospheric pressure, and humidity. Samples are collected every 10 minutes, beginning in 2003. For this illustration, we will use data collected between 2009 and 2016 for efficiency. This part of the dataset was prepared by François Chollet for his book [Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python).

__Source__: Portions of this page are reproduced from work created and shared by Google and used according to terms described in the [Creative Commons 4.0 Attribution License](https://creativecommons.org/licenses/by/4.0/). For the original tutorial visit: https://www.tensorflow.org/tutorials/text/image_captioning

-------------

## **Part 0**: Setup

### Import packages

In [None]:
# Standard packages
import numpy as np
import pandas as pd
import os

# Import tensorflow 
import tensorflow as tf

# Plotting and visualization 
import matplotlib as mpl
import matplotlib.pyplot as plt

mpl.rcParams['figure.figsize'] = (12, 8)
mpl.rcParams['axes.grid'] = False

### Constants

In [None]:
SEED        = 42       # random seed for replication
TRAIN_SPLIT = 300000   # restrict training data to ~2100 days 


### Support functions

In [None]:
def univariate_data(dataset, start_index, end_index, history_size, target_size):
    """
    Returns windows of time for the model to train on for univariate data. 
    
    Args:
        history_size (int): size of the past window of information
        target_size (int): how far into the future the model has to learn to predict
        
    Returns: 
        np.array(data): feature data in window
        np.array(labels): labels in window 
    """
    
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        # Reshape data from (history_size,) to (history_size, 1)
        data.append(np.reshape(dataset[indices], (history_size, 1)))
        labels.append(dataset[i+target_size])
        
    return np.array(data), np.array(labels)

def multivariate_data(dataset, target, start_index, end_index, history_size, target_size, step, single_step=False):
    """
    Returns windows of time for the model to train on for multivariate data. 
    
    Args:
        history_size (int): size of the past window of information
        target_size (int): how far into the future the model has to learn to predict
        
    Returns: 
        np.array(data): feature data in window
        np.array(labels): labels in window 
    """
    
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i, step)
        data.append(dataset[indices])

        if single_step:
            labels.append(target[i+target_size])
        else:
            labels.append(target[i:i+target_size])

    return np.array(data), np.array(labels)

def create_time_steps(length):
    return list(range(-length, 0))

def baseline(history):
    return np.mean(history)

def show_plot(plot_data, delta, title):
    """
    Shows a plot with history data, the true future and the model prediction
    
    Args:
        plot_data: (x, y) data to plot
        delta (int): difference between historical data and future (usually 0)
        title (str): plot title
        
    Returns: 
        np.array(data): feature data in window
        np.array(labels): labels in window 
    """
    labels = ['History', 'True Future', 'Model Prediction']
    marker = ['.-', 'rx', 'go']
    time_steps = create_time_steps(plot_data[0].shape[0])
    
    if delta:
        future = delta
    else:
        future = 0

    plt.title(title)
    for i, x in enumerate(plot_data):
        if i:
            plt.plot(future, plot_data[i], marker[i], markersize=10,
                     label=labels[i])
        else:
            plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])
        
    plt.legend()
    plt.grid()
    plt.xlim([time_steps[0], (future+5)*2])
    plt.xlabel('Time-Step')
    
    return plt.show()

def plot_train_history(history, title):
    """
    Plot the training history (i.e. training loss and validation loss)
    
    Args:
        history: training history
        title (str): plot title
    """
    
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(len(loss))

    plt.figure()

    plt.plot(epochs, loss, 'b', label='Training loss')
    plt.plot(epochs, val_loss, 'r', label='Validation loss')
    plt.title(title)
    plt.legend()

    plt.show()
    
def multi_step_plot(history, true_future, prediction):
    """
    Shows a plot with history data, the true future and the model prediction
    
    Args:
        history: training history
        true_future: true future temperatures
        prediction: predicted temperatures
    """
    plt.figure(figsize=(12, 6))
    num_in = create_time_steps(len(history))
    num_out = len(true_future)

    plt.plot(num_in, np.array(history[:, 1]), '.-', label='History')
    plt.plot(np.arange(num_out)/STEP, np.array(true_future), 'bo',
           label='True Future')
    if prediction.any():
        plt.plot(np.arange(num_out)/STEP, np.array(prediction), 'ro', label='Predicted Future')
    plt.legend(loc='upper left')
    plt.grid()
    plt.show()
    

## **Part 1**: Download data and EDA

In [None]:
# Download weather data and extract .zip file
zip_path = tf.keras.utils.get_file(
    origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip',
    fname='jena_climate_2009_2016.csv.zip',
    cache_dir='data/weather/',
    cache_subdir='',
    extract=True)
csv_path, _ = os.path.splitext(zip_path)

In [None]:
# Read data with pandas 
df = pd.read_csv(csv_path)

print(df.shape)
df.head()

As you can see above, samples are recorded every 10 mintues. This means that, for a single hour, you will have 6 observations. Similarly, a single day will contain 144 (6 x 24) observations.

Given a specific time, let's say you want to predict the temperature 6 hours into the future. In order to make this prediction, you choose to use 5 days of samples. Thus, you would create a window containing the last 720 (5 x 144) observations to train the model. Many such configurations are possible, so it's a good idea to experiment.

In [None]:
# Set the seed 
tf.random.set_seed(SEED)


In [None]:
# Extract temperature data (univerate time series)
uni_data_df = df['T (degC)']
uni_data_df.index = df['Date Time']

uni_data_df.head()

In [None]:
# Plot temperature data
uni_data_df.plot(grid=True)
plt.show()

## **Part 2**: Univerate time series

First, we will train a model using only a single input feature (temperature), and use that to make predictions for that same value in the future.

### Prepare data

In [None]:
# Extract temperature values
uni_data = uni_data_df.values

# Standardize data
uni_train_mean = uni_data[:TRAIN_SPLIT].mean()
uni_train_std = uni_data[:TRAIN_SPLIT].std()
uni_data = (uni_data-uni_train_mean)/uni_train_std

In [None]:
# Create train/test split: past 30 temperature samples; predict temperature at the next time step

univariate_past_history = 30
univariate_future_target = 0

x_train_uni, y_train_uni = univariate_data(uni_data, 0, TRAIN_SPLIT,
                                           univariate_past_history,
                                           univariate_future_target)
x_val_uni, y_val_uni = univariate_data(uni_data, TRAIN_SPLIT, None,
                                       univariate_past_history,
                                       univariate_future_target)

print('x_train shape:'.ljust(40) + str(x_train_uni.shape))
print('Single window of past history shape:'.ljust(40) + str(x_train_uni[0].shape))
print('Target temperature:'.ljust(40) + str(y_train_uni[0]))

In [None]:
# Plot prediction
show_plot([x_train_uni[0], y_train_uni[0]], 0, 'Sample Example')

### Baseline Prediction: the MEAN

In [None]:
# Plot baseline prediction
show_plot([x_train_uni[0], y_train_uni[0], baseline(x_train_uni[0])], 0, 'Baseline Prediction: the MEAN')

### Simple RNN Prediction

A recurrent neural network (RNN) is well-suited to sequential data such as this time series. We will use a specialized RNN layer called Long Short Term Memory (LSTM).

In [None]:
# Shuffle, batch, and cache the dataset
BATCH_SIZE = 256
BUFFER_SIZE = 10000

train_univariate = tf.data.Dataset.from_tensor_slices((x_train_uni, y_train_uni))
train_univariate = train_univariate.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_univariate = tf.data.Dataset.from_tensor_slices((x_val_uni, y_val_uni))
val_univariate = val_univariate.batch(BATCH_SIZE).repeat()

In [None]:
# Set up a simple LSTM model
simple_lstm_model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(8, input_shape=x_train_uni.shape[-2:]),
    tf.keras.layers.Dense(1)
])

simple_lstm_model.compile(optimizer='adam', loss='mae')

In [None]:
# Train model
EVALUATION_INTERVAL = 200
EPOCHS = 10

simple_lstm_model.fit(train_univariate, epochs=EPOCHS,
                      steps_per_epoch=EVALUATION_INTERVAL,
                      validation_data=val_univariate, validation_steps=50)

In [None]:
# Take 3 windows and plot predictions
i = 0
for x, y in val_univariate.take(3):
    show_plot([x[0].numpy(), y[0].numpy(), simple_lstm_model.predict(x)[0]], 0, 'Simple LSTM model with data {}'.format(i+1))
    i += 1
    print()

This looks better than the baseline. 

## **Part 3**: Multiverate Time Series (RNN)

Next, we will train a model on three of the original fourteen features. The features used are: air temperature, atmospheric pressure, and air density.

In [None]:
# Select features 
features_considered = ['p (mbar)', 'T (degC)', 'rho (g/m**3)']
features = df[features_considered]
features.index = df['Date Time']

print(features.shape)
features.head()

In [None]:
# Plot features
features.plot(subplots=True)
plt.show()

In [None]:
# Standardize data 
dataset = features.values
data_mean = dataset[:TRAIN_SPLIT].mean(axis=0)
data_std = dataset[:TRAIN_SPLIT].std(axis=0)
dataset = (dataset-data_mean)/data_std

### Single Step Model: Predict the Next Timestep

In [None]:
# Model trains on hourly samples, so we need 720 original samples (= 5 days)
# Forecast temperature 12 hours into the future (72 = 12 x 6)
past_history = 720
future_target = 72
STEP = 6

x_train_single, y_train_single = multivariate_data(dataset, dataset[:, 1], 0,
                                                   TRAIN_SPLIT, past_history,
                                                   future_target, STEP,
                                                   single_step=True)
x_val_single, y_val_single = multivariate_data(dataset, dataset[:, 1],
                                               TRAIN_SPLIT, None, past_history,
                                               future_target, STEP,
                                               single_step=True)

print('x_train_single shape:'.ljust(40) + str(x_train_single.shape))
print('Single window of past history shape:'.ljust(40) + str(x_train_single[0].shape))
print('Target temperature:'.ljust(40) + str(y_val_single[0]))

In [None]:
# Shuffle, batch, and cache the dataset
train_data_single = tf.data.Dataset.from_tensor_slices((x_train_single, y_train_single))
train_data_single = train_data_single.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_data_single = tf.data.Dataset.from_tensor_slices((x_val_single, y_val_single))
val_data_single = val_data_single.batch(BATCH_SIZE).repeat()

In [None]:
# Set up a simple LSTM model
single_step_model = tf.keras.models.Sequential()
single_step_model.add(tf.keras.layers.LSTM(32,
                                           input_shape=x_train_single.shape[-2:]))
single_step_model.add(tf.keras.layers.Dense(1))

single_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='mae')

In [None]:
# Train model
single_step_history = single_step_model.fit(train_data_single, epochs=EPOCHS,
                                            steps_per_epoch=EVALUATION_INTERVAL,
                                            validation_data=val_data_single,
                                            validation_steps=50)

In [None]:
# Plot training history (i.e. training loss and validation loss)
plot_train_history(single_step_history,
                   'Single Step Training and validation loss')

In [None]:
# Plot 3 windows and predictions
DELTA = 12 
i = 0
for x, y in val_data_single.take(3):
    show_plot([x[0][:, 1].numpy(), y[0].numpy(), single_step_model.predict(x)[0]], DELTA, 'Single Step Prediction with data {}'.format(i))
    i += 1
    print()

### Multi-Step Model: Predict a Range of Future Values

In a multi-step prediction model, given a past history, the model needs to learn to predict a range of future values.

The training data again consists of recordings over the past five days sampled every hour. However, here, the model needs to learn to predict the temperature for the next 12 hours. Since an obversation is taken every 10 minutes, the output is 72 predictions.

In [None]:
# Split data into training/validation data
future_target = 72
x_train_multi, y_train_multi = multivariate_data(dataset, dataset[:, 1], 0,
                                                 TRAIN_SPLIT, past_history,
                                                 future_target, STEP)
x_val_multi, y_val_multi = multivariate_data(dataset, dataset[:, 1],
                                             TRAIN_SPLIT, None, past_history,
                                             future_target, STEP)


In [None]:
# Shuffle, batch, and cache the dataset
train_data_multi = tf.data.Dataset.from_tensor_slices((x_train_multi, y_train_multi))
train_data_multi = train_data_multi.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
val_data_multi = val_data_multi.batch(BATCH_SIZE).repeat()

In [None]:
# Plot 2 windows and the future values to predict
for x, y in train_data_multi.take(2):
    multi_step_plot(x[0], y[0], np.array([0]))

In [None]:
# Set up LSTM model
multi_step_model = tf.keras.models.Sequential()
multi_step_model.add(tf.keras.layers.LSTM(32,
                                          return_sequences=True,
                                          input_shape=x_train_multi.shape[-2:]))
multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))
multi_step_model.add(tf.keras.layers.Dense(72))

multi_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0), loss='mae')

In [None]:
# Train model 
multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
                                          steps_per_epoch=EVALUATION_INTERVAL,
                                          validation_data=val_data_multi,
                                          validation_steps=50)

In [None]:
# Plot training history
plot_train_history(multi_step_history, 'Multi-Step Training and validation loss')

In [None]:
# Plot 4 windows and the predicted values 
for x, y in val_data_multi.take(4):
    multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])

# **Summary**

In [None]:
# BASELINE 
for x, y in val_univariate.take(1):
    show_plot([x[0].numpy(), y[0].numpy(), baseline(x[0])], 0, 'Baseline Prediction: the MEAN')

In [None]:
# UNIVARIATE
for x, y in val_univariate.take(1):
    show_plot([x[0].numpy(), y[0].numpy(), simple_lstm_model.predict(x)[0]], 0, 'Simple LSTM model')

In [None]:
# MULTIVARIATE (single step)
for x, y in val_data_single.take(1):
    show_plot([x[0][:, 1].numpy(), y[0].numpy(), single_step_model.predict(x)[0]], DELTA, 'Single Step Prediction')

In [None]:
# MULTIVARIATE (multi-step)
for x, y in val_data_multi.take(1):
    multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])