<a href="https://colab.research.google.com/github/yiwenwangANU/Machine_Learning/blob/main/10_Time_Series_Forcasting_Part2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Date Preprocessing

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
WIN_SIZE = 7
HORIZON = 1

In [3]:
!wget https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/BTC_USD_2013-10-01_2021-05-18-CoinDesk.csv

--2022-06-18 06:48:16--  https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/BTC_USD_2013-10-01_2021-05-18-CoinDesk.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 178509 (174K) [text/plain]
Saving to: ‘BTC_USD_2013-10-01_2021-05-18-CoinDesk.csv’


2022-06-18 06:48:16 (11.5 MB/s) - ‘BTC_USD_2013-10-01_2021-05-18-CoinDesk.csv’ saved [178509/178509]



In [4]:
raw_df = pd.read_csv('BTC_USD_2013-10-01_2021-05-18-CoinDesk.csv')
data_df = raw_df[{'Date', 'Closing Price (USD)'}]
data_df = data_df.rename({'Closing Price (USD)': 'Price'}, axis=1)
data_df

Unnamed: 0,Date,Price
0,2013-10-01,123.654990
1,2013-10-02,125.455000
2,2013-10-03,108.584830
3,2013-10-04,118.674660
4,2013-10-05,121.338660
...,...,...
2782,2021-05-14,49764.132082
2783,2021-05-15,50032.693137
2784,2021-05-16,47885.625255
2785,2021-05-17,45604.615754


In [5]:
data_df['Date'] = pd.to_datetime(data_df['Date'])
data_df = data_df.set_index('Date')

In [6]:
for i in range(WIN_SIZE):
  data_df[f'Price+{i+1}'] = data_df['Price'].shift(periods=i+1, axis=0)
data_df = data_df.dropna().astype(np.float32)
data_df

Unnamed: 0_level_0,Price,Price+1,Price+2,Price+3,Price+4,Price+5,Price+6,Price+7
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-10-08,123.032997,121.794998,120.655327,121.338661,118.674660,108.584831,125.455002,123.654991
2013-10-09,124.049004,123.032997,121.794998,120.655327,121.338661,118.674660,108.584831,125.455002
2013-10-10,125.961159,124.049004,123.032997,121.794998,120.655327,121.338661,118.674660,108.584831
2013-10-11,125.279663,125.961159,124.049004,123.032997,121.794998,120.655327,121.338661,118.674660
2013-10-12,125.927498,125.279663,125.961159,124.049004,123.032997,121.794998,120.655327,121.338661
...,...,...,...,...,...,...,...,...
2021-05-14,49764.132812,52147.820312,56573.554688,55715.546875,58102.191406,58788.210938,57107.121094,56583.851562
2021-05-15,50032.691406,49764.132812,52147.820312,56573.554688,55715.546875,58102.191406,58788.210938,57107.121094
2021-05-16,47885.625000,50032.691406,49764.132812,52147.820312,56573.554688,55715.546875,58102.191406,58788.210938
2021-05-17,45604.617188,47885.625000,50032.691406,49764.132812,52147.820312,56573.554688,55715.546875,58102.191406


In [7]:
X = data_df.drop('Price', axis=1)
y = data_df['Price']

In [8]:
split_size = int(0.8*len(data_df))
X_train = X[:split_size]
y_train = y[:split_size]
X_test = X[split_size:]
y_test = y[split_size:]
len(X_train), len(y_train), len(X_test), len(y_test)

(2224, 2224, 556, 556)

In [9]:
train_features_dataset = tf.data.Dataset.from_tensor_slices(X_train)
train_labels_dataset = tf.data.Dataset.from_tensor_slices(y_train)
test_features_dataset = tf.data.Dataset.from_tensor_slices(X_test)
test_labels_dataset = tf.data.Dataset.from_tensor_slices(y_test)

train_dataset = tf.data.Dataset.zip((train_features_dataset, train_labels_dataset)).batch(1024).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.zip((test_features_dataset, test_labels_dataset)).batch(1024).prefetch(tf.data.AUTOTUNE)

#NBeats Model

In [10]:
from tensorflow.keras import layers

class NBeatsBlock(tf.keras.layers.Layer):
  def __init__(self, backcast_size: int, theta_size: int, forcast_size: int,
               n_neurons: int, n_layers: int, **kwargs):
    super().__init__(**kwargs)
    self.backcast_size = backcast_size
    self.theta_size = theta_size
    self.forcast_size = forcast_size
    self.n_neurons = n_neurons
    self.n_layers = n_layers

    self.dense_layers = [layers.Dense(n_neurons, activation='relu') for _ in range(n_layers)]
    self.theta_layer = layers.Dense(units=theta_size)

  def call(self, inputs):
    x = inputs
    for layer in self.dense_layers:
      x = layer(x)
    x = self.theta_layer(x)
    backcast, forecast = x[:, :self.backcast_size], x[:, -self.forcast_size:]
    return backcast, forecast

In [11]:
N_EPOCHS = 5000 # called "Iterations" in Table 18
N_NEURONS = 512 # called "Width" in Table 18
N_LAYERS = 4
N_STACKS = 30

BACKCAST_SIZE = WIN_SIZE * HORIZON # called "Lookback" in Table 18
THETA_SIZE = BACKCAST_SIZE + HORIZON

BACKCAST_SIZE, THETA_SIZE

(7, 8)

##Build the model

In [12]:
stack_inputs = layers.Input(shape=(WIN_SIZE), dtype=tf.float32, name='input_layer')
backcast, forecast = NBeatsBlock(backcast_size=BACKCAST_SIZE, 
                                 theta_size=THETA_SIZE,
                                 forcast_size=HORIZON,
                                 n_neurons=N_NEURONS, 
                                 n_layers=N_LAYERS,
                                 name='nbeats0')(stack_inputs)
residual = layers.subtract([stack_inputs, backcast], name='subtract0')

for i in range(N_STACKS-1):
  backcast, block_forecast = NBeatsBlock(backcast_size=BACKCAST_SIZE, 
                                          theta_size=THETA_SIZE,
                                          forcast_size=HORIZON,
                                          n_neurons=N_NEURONS, 
                                          n_layers=N_LAYERS,
                                          name=f'nbeats{i+1}')(residual)
  residual = layers.subtract([residual, backcast], name=f'subtract{i+1}')
  forecast = layers.add([forecast, block_forecast], name=f'add{i+1}')

model_7 = tf.keras.Model(stack_inputs, forecast)

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model_7, show_shapes=False)

In [14]:
model_7.compile(loss=tf.keras.losses.mae,
                optimizer=tf.keras.optimizers.Adam(lr=0.001))
model_7.fit(train_dataset,
            epochs=5000,
            validation_data=test_dataset,
            verbose=0,
            callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=200, restore_best_weights=True),
                      tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=100, verbose=1)])

  super(Adam, self).__init__(name, **kwargs)



Epoch 236: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.

Epoch 346: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.

Epoch 446: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.


<keras.callbacks.History at 0x7f3a065a3a50>

In [15]:
model_7.evaluate(test_dataset)



573.770751953125