In [None]:
import tensorflow as tf
import keras
from keras import backend as K
import pandas as pd
import numpy as np
import pickle as pkl

In [None]:
import os
import datetime
import matplotlib as mlp
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

## Data Loading

In [None]:
df = pd.read_csv('Data\WeatherDataVec.csv')
df['Date'] = pd.to_datetime(df['Date'],format='%Y-%m-%d')
df.set_index('Date', inplace=True)

In [None]:
df.head()

In [None]:
n = len(df)
train_df = df[0:int(n*0.75)]
val_df = df[int(n*0.75):int(n*0.875)]
test_df = df[int(n*0.875):]

num_features = df.shape[1]

In [None]:
train_mean = train_df.mean()
train_std = train_df.std()

train_y = train_df.pop('StageHeight')
test_y = test_df.pop('StageHeight')
val_y = val_df.pop('StageHeight')

train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std

train_df['StageHeight'] = train_y
test_df['StageHeight'] = test_y
val_df['StageHeight'] = val_y

In [None]:
# Store for use in inference
with open('scale_var.pkl','wb') as f:
    pkl.dump([train_mean,train_std], f)

In [None]:
train_df.head()

## Data Windowing

In [None]:
# Generates windowed dataset with feature-label pairs
class WindowGenerator():
    def __init__(self, input_width, label_width, shift,batch_size,
                 train_df=train_df, val_df=val_df, test_df=test_df,
                 label_columns=None):
        # Store the raw data.
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df
        self.batch_size = batch_size
        # Work out the label column indices.
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in
                                          enumerate(label_columns)}
        self.column_indices = {name: i for i, name in
                               enumerate(train_df.columns)}

        # Work out the window parameters.
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = input_width + shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}',
            f'Label column name(s): {self.label_columns}'])

    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.labels_slice, :]
        if self.label_columns is not None:
            labels = tf.stack(
                [labels[:, :, self.column_indices[name]] for name in self.label_columns],
                axis=-1)

        # Slicing doesn't preserve static shape information, so set the shapes
        # manually. This way the `tf.data.Datasets` are easier to inspect.
        inputs.set_shape([None, self.input_width, None])
        labels.set_shape([None, self.label_width, None])

        return inputs, labels

    def make_dataset(self, data):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.preprocessing.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=self.total_window_size,
            sequence_stride=1,
            shuffle=False,
            batch_size=self.batch_size, )

        ds = ds.map(self.split_window)

        return ds

    @property
    def train(self):
        return self.make_dataset(self.train_df)

    @property
    def val(self):
        return self.make_dataset(self.val_df)

    @property
    def test(self):
        return self.make_dataset(self.test_df)

    @property
    def example(self):
        """Get and cache an example batch of `inputs, labels` for plotting."""
        result = getattr(self, '_example', None)
        if result is None:
            # No example batch was found, so get one from the `.train` dataset
            result = next(iter(self.test))
            # And cache it for next time
            self._example = result
        return result

    def plot(self, model=None, plot_col='StageHeight', max_subplots=1):
        inputs, labels = self.example
        plt.figure(figsize=(12, 8))
        plot_col_index = self.column_indices[plot_col]
        max_n = min(max_subplots, len(inputs))
        for n in range(max_n):
            plt.subplot(3, 1, n + 1)
            plt.ylabel(f'{plot_col}'+' (Feet)')
            plt.plot(self.input_indices, inputs[n, :, plot_col_index],
                     label='Inputs', marker='.', zorder=-10)

            if self.label_columns:
                label_col_index = self.label_columns_indices.get(plot_col, None)
            else:
                label_col_index = plot_col_index

            if label_col_index is None:
                continue

            plt.plot(self.label_indices, labels[n, :, label_col_index],
                        marker='.',label='Labels') #, c='#2ca02c', s=64)edgecolors='k',
            if model is not None:
                predictions = model(inputs)
                plt.plot(self.label_indices, predictions[n, :, label_col_index],
                            marker='.',  label='Predictions')
                            # c='#ff7f0e', s=64,edgecolors='k'

            if n == 0:
                plt.legend()

        plt.xlabel('Time [Days]')

In [None]:
# Define Nash-Sutcliffe Efficiency
def NSE(y_true,y_pred):
    val = 1 - K.sum(K.square(y_true-y_pred)) / K.sum(K.square(y_true-K.mean(y_true)))
    return val

In [None]:
def compile_and_fit(model, window, patience=3, epochs=100):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                      patience=patience,
                                                      mode='min',
                                                      restore_best_weights=True)
    
    model.compile(loss=tf.losses.MeanSquaredError(),
                  optimizer=tf.optimizers.Adam(),
                  metrics=[NSE,tf.metrics.MeanAbsoluteError()])

    history = model.fit(window.train, epochs=epochs,
                        validation_data=window.val, shuffle=True,
                        callbacks=[early_stopping])
    return history

In [None]:
val_performance = {}
test_performance = {}

## Single Output Dense Baseline

In [None]:
multi_step_window = WindowGenerator(input_width=4, batch_size=32, train_df=train_df, val_df=val_df,
                                     test_df=test_df, label_width=1,
                                     shift=3, label_columns=['StageHeight'])

In [None]:
single_out_dense = tf.keras.Sequential([
    # Shape: (time, features) => (time*features)
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=1),
    # Add back the time dimension.
    # Shape: (outputs) => (1, outputs)
    tf.keras.layers.Reshape([1, -1]),
])
history = compile_and_fit(single_out_dense, multi_step_window)

#IPython.display.clear_output()
val_performance['Dense Baseline'] = single_out_dense.evaluate(multi_step_window.val)
test_performance['Dense Baseline'] = single_out_dense.evaluate(multi_step_window.test, verbose=0)
multi_step_window.plot(single_out_dense)

In [None]:
single_out_dense.save('ann3.h5')

## Single-shot Multi-output RNN

In [None]:
# Define number of output steps
OUT_STEPS = 5

In [None]:
# Initialize window for multi-output forecast
# input_width stores the number of days taken as inputs
# label_width stores the number of output predictions
# shift stores the forecast horizon
multi_window = WindowGenerator(input_width=4,batch_size=32, train_df=train_df, val_df=val_df, test_df=test_df,
                               label_width=OUT_STEPS,
                               shift=OUT_STEPS, label_columns=['StageHeight'])

In [None]:
multi_lstm_model = tf.keras.Sequential([
    # Shape [batch, time, features] => [batch, lstm_units]
    # Adding more `lstm_units` just overfits more quickly.
    tf.keras.layers.LSTM(32, return_sequences=True),
    # Shape => [batch, out_steps*features]
    tf.keras.layers.Dropout(0.6),
    tf.keras.layers.LSTM(16, return_sequences=True),
    tf.keras.layers.Dropout(0.6),
    tf.keras.layers.LSTM(8, return_sequences=False),
    #tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(OUT_STEPS*num_features,
                          kernel_initializer=tf.initializers.Zeros()),
    # Shape => [batch, out_steps, features]
    tf.keras.layers.Reshape([OUT_STEPS, num_features])
])

multi_history = compile_and_fit(multi_lstm_model, multi_window, epochs=100)

# IPython.display.clear_output()

val_performance['Multi_LSTM'] = multi_lstm_model.evaluate(multi_window.val)
test_performance['Multi_LSTM'] = multi_lstm_model.evaluate(multi_window.test, verbose=0)
multi_window.plot(multi_lstm_model)

In [None]:
multi_lstm_model.save('multi5.h5')

## Single output RNN

In [None]:
# Initialize window for single output forecast
# input_width stores the number of days taken as inputs
# label_width stores the number of output predictions
# shift stores the forecast horizon
single_step_window = WindowGenerator(input_width=4, batch_size=32, train_df=train_df, val_df=val_df,
                                     test_df=test_df, label_width=1,
                                     shift=3, label_columns=['StageHeight'])

In [None]:
single_step_lstm = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(32, return_sequences=True),
    # Shape => [batch, out_steps*features]
    tf.keras.layers.Dropout(0.6),
    tf.keras.layers.LSTM(16, return_sequences=True),
    tf.keras.layers.Dropout(0.6),
    tf.keras.layers.LSTM(8, return_sequences=False),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=1),
    tf.keras.layers.Reshape([1, 1])
])

history = compile_and_fit(single_step_lstm, single_step_window)

# IPython.display.clear_output()
val_performance['Single_LSTM'] = single_step_lstm.evaluate(single_step_window.val)
test_performance['Single_LSTM'] = single_step_lstm.evaluate(single_step_window.test, verbose=0)
single_step_window.plot(single_step_lstm)

In [None]:
single_step_lstm.save('offset3.h5')

## Inference

In [None]:
def preprocess_data(data):
    with open('Data/scale_var.pkl', 'rb') as f:
        var = pkl.load(f)
    mean = var[0]
    std = var[1]
    df_y = df.pop('StageHeight')
    df = (df - mean) / std
    df['StageHeight'] = df_y
    
    ds = tf.keras.preprocessing.timeseries_dataset_from_array(
        data=data,
        targets=None,
        sequence_length=4,
        sequence_stride=1,
        shuffle=False,
        batch_size=1, )
    
    return ds

In [None]:
# Data is a dataframe with meteorological conditions of the past 4 days
# Make sure dataframe columns are in the same sequence as training data
ds = preprocess_data(data)
prediction = model.predict(ds)
for i in prediction:
    print("Predicted Stage Height :{0:.2f} ft".format(i[0, 0]))