In [None]:
!pip install -q -U keras-tuner

[K     |████████████████████████████████| 135 kB 5.2 MB/s 
[K     |████████████████████████████████| 1.6 MB 44.9 MB/s 
[?25h

In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from dateutil.parser import parse
dateparse=lambda dates:parse(dates)

from sklearn.model_selection import TimeSeriesSplit
from matplotlib import pyplot

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from matplotlib import pyplot
import numpy as np
from statistics import mean
import keras_tuner as kt 
from keras_tuner.engine.hyperparameters import HyperParameters

import IPython
import IPython.display

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Pre-processing dataset

In [None]:
df = pd.read_csv ('drive/MyDrive/Data/weekly_features.csv')
df = df.drop (columns = ['Unnamed: 0','USD_PHP Historical Data.csv'])
dates = df.year*100+df.week
df['Date'] = pd.to_datetime(dates.astype(str) + '0', format='%Y%W%w')
df['Date'] = pd.to_datetime(df['Date'])
df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
mask = (df['Date'] >'1990-09-30') & (df['Date'] <= '2021-09-30')
df= df.loc[mask]
df = df.fillna(method='ffill')

def convert_to_timestamp(x):
    """Convert date objects to integers"""
    return time.mktime(x.to_datetime().timetuple())

# https://www.aiproblog.com/index.php/2018/08/21/4-common-machine-learning-data-transforms-for-time-series-forecasting/
# difference dataset
diff_df=df.drop(columns=['Date', 'year', 'week'])
diff_df = diff_df.diff()
diff_df = diff_df.iloc[1:]
#diff_df['year']=df.year[1:]
diff_df['week']=df.week[1:]
#diff_df['Date']=df.Date[1:]
#diff_df['Date'] = pd.to_datetime(diff_df['Date'])
# convert date to timestamp
#diff_df['Date'] = diff_df['Date'].map(pd.Timestamp.timestamp)

#split the data into training and testing dataset
column_indices = {name: i for i, name in enumerate(diff_df.columns)}

n = len(diff_df)
train_df = diff_df[0:int(n*0.7)]
test_df = diff_df[int(n*0.7):]

num_features = diff_df.shape[1]

#Normalize the data
from sklearn.preprocessing import MinMaxScaler
#col_list = [i for i in diff_df.columns if i != 'Date']
scaler = MinMaxScaler()
scaled_train = scaler.fit_transform(train_df)
scaled_test = scaler.transform(test_df)

In [None]:
raw_values = df.drop(columns=['year','week'])
raw_values['Date'] = pd.to_datetime(raw_values['Date'])
# convert date to timestamp
raw_values['Date'] = raw_values['Date'].map(pd.Timestamp.timestamp)

In [None]:
def multivariate_data(dataset, target, start_index, end_index, history_size,
                      target_size, step, single_step=False):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i, step)
        data.append(dataset[indices])

        if single_step:
            labels.append(target[i+target_size])
        else:
            labels.append(target[i:i+target_size])

    return np.array(data), np.array(labels)

### Set up window

In [None]:
past_history = 20
future_target = 15
STEP = 1
X, y = multivariate_data(scaled_train, scaled_train, 0,
              None, past_history,
              future_target, STEP,
              single_step=False)

X_test, y_test = multivariate_data(scaled_test, scaled_test, 0,
                   None, past_history,
                   future_target, STEP,
                   single_step=False)
test_data_multi= tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_data_multi = test_data_multi.batch(32).repeat()


In [None]:
# Function of training models
MAX_EPOCHS = 90
EVALUATION_INTERVAL = 200
batch_size = 32
buffer_size = 150
def compile_and_fit(model, train, val, patience=2):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=patience,
                                                    mode='min') 

    model.compile(loss=tf.keras.losses.MeanSquaredError(),
                optimizer=tf.keras.optimizers.Adam(learning_rate = 0.0001),
                metrics=[tf.keras.losses.MeanSquaredError()])

    history = model.fit(train, batch_size=batch_size, epochs=MAX_EPOCHS,
                      validation_data=val,
                        steps_per_epoch=EVALUATION_INTERVAL,
                      callbacks=[early_stopping],
                    validation_steps=10
                       )
    model.reset_states()
    return history

### Model

In [None]:
class SampleModel(kt.HyperModel):
    def build(self, hp):
        model = tf.keras.Sequential()
        #model.add(tf.keras.layers.LSTM(units=hp.Int('units', min_value=20, max_value=600, step=20),return_sequences=True))
        for i in range(hp.Int("n_cuDNNlstm_layers",1,2)):
          model.add(tf.compat.v1.keras.layers.CuDNNLSTM(units=hp.Int(f'lstm_{i}_units', min_value=192-128*i, max_value=384-256*i, step=32),return_sequences=True))
          model.add(layers.Dropout(rate=hp.Float(f'lstm_{i}_dropout_rate', min_value=0.05, max_value=0.95, step=0.1)))

        
        for i in range(hp.Int("n_conv_layers",1,2)):
          model.add(layers.Conv1D(filters=hp.Int(f'conv_{i}_units', min_value=128-64*i, max_value=256-128*i, step=32), kernel_size=3, activation="relu"))
          model.add(layers.MaxPooling1D(pool_size=1))
          model.add(layers.Dropout(rate=hp.Float(f'conv_{i}_dropout_rate', min_value=0.05, max_value=0.95, step=0.1)))
        
        model.add(layers.Flatten())
       
        for i in range(hp.Int("n_dense_layers",1,2)):
          model.add(layers.Dense(units=hp.Int(f'dense_{i}_units', min_value=384-256*i, max_value=512-256*i, step=32)))
        model.add(layers.Dense(future_target*num_features,kernel_initializer=tf.initializers.zeros()))
        model.add(layers.Reshape([future_target, num_features]))

        model.compile(
        optimizer=tf.keras.optimizers.Adam(
            hp.Choice('learning_rate',
                      values=[1e-3, 1e-4, 1e-5])),
        loss=tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.losses.MeanSquaredError()])
        
        return model

In [None]:
## trail.hyperparamter is replaced by hp
buffer_size = 150

class CVTuner(kt.engine.tuner.Tuner):
    def run_trial(self, trial, x, y, *args, **kwargs):
        splits = TimeSeriesSplit(n_splits=5)
        val_losses = []
        batch_size = 32
        epochs = trial.hyperparameters.Int('epochs', 10, 100, step=10)

        for train_indices, test_indices in splits.split(x):
            x_train, x_val = x[train_indices], x[test_indices]
            y_train, y_val = y[train_indices], y[test_indices]

            train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
            train_data = train_data.cache().shuffle(buffer_size).batch(batch_size).repeat()

            val_data = tf.data.Dataset.from_tensor_slices((x_val, y_val))
            val_data = val_data.batch(batch_size).repeat()
    
            model = self.hypermodel.build(trial.hyperparameters)
            model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs)
            val_loss= model.evaluate(x_val, y_val)
            val_losses.append(val_loss[1])
        
        
        self.oracle.update_trial(trial.trial_id, {'val_loss': np.mean(val_losses)})
        #self.save_model(trial.trial_id, model)


model = SampleModel()
tuner = CVTuner(oracle=kt.oracles.RandomSearch(objective='val_loss',max_trials=60), hypermodel=model, directory='drive/MyDrive/Data', project_name = 'LSTM_CNN_encoder', executions_per_trial=2, overwrite = True)


early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, mode='min') 

tuner.search(X, y, callbacks=[early_stopping])

best_hyperparameters = tuner.get_best_hyperparameters()[0]

print(best_hyperparameters.values)
best_model = tuner.hypermodel.build(best_hyperparameters)
#best_model.summary()

Trial 60 Complete [00h 00m 41s]
val_loss: 0.025289083644747735

Best val_loss So Far: 0.007745090406388044
Total elapsed time: 00h 52m 32s
{'n_cuDNNlstm_layers': 1, 'lstm_0_units': 320, 'lstm_0_dropout_rate': 0.05, 'n_conv_layers': 2, 'conv_0_units': 192, 'conv_0_dropout_rate': 0.25000000000000006, 'n_dense_layers': 2, 'dense_0_units': 416, 'learning_rate': 0.001, 'epochs': 80, 'lstm_1_units': 96, 'lstm_1_dropout_rate': 0.45000000000000007, 'conv_1_units': 96, 'conv_1_dropout_rate': 0.45000000000000007, 'dense_1_units': 256}


In [None]:
tuner.results_summary()

Results summary
Results in drive/MyDrive/Data/LSTM_CNN_encoder
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x7f7e9512cc50>
Trial summary
Hyperparameters:
n_cuDNNlstm_layers: 1
lstm_0_units: 320
lstm_0_dropout_rate: 0.05
n_conv_layers: 2
conv_0_units: 192
conv_0_dropout_rate: 0.25000000000000006
n_dense_layers: 2
dense_0_units: 416
learning_rate: 0.001
epochs: 80
lstm_1_units: 96
lstm_1_dropout_rate: 0.45000000000000007
conv_1_units: 96
conv_1_dropout_rate: 0.45000000000000007
dense_1_units: 256
Score: 0.007745090406388044
Trial summary
Hyperparameters:
n_cuDNNlstm_layers: 1
lstm_0_units: 224
lstm_0_dropout_rate: 0.25000000000000006
n_conv_layers: 1
conv_0_units: 160
conv_0_dropout_rate: 0.25000000000000006
n_dense_layers: 1
dense_0_units: 480
learning_rate: 0.001
epochs: 80
lstm_1_units: 96
lstm_1_dropout_rate: 0.8500000000000002
conv_1_units: 128
conv_1_dropout_rate: 0.7500000000000002
Score: 0.007887329161167144
Trial summary
Hyperparameters:
n_cuDNNlstm_

In [None]:
tuner.results_summary()

Results summary
Results in drive/MyDrive/Data/LSTM_CNN
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x7f91ccf02490>
Trial summary
Hyperparameters:
lstm_units: 416
lstm_dropout_rate: 0.25000000000000006
conv__units: 192
conv_dropout_rate: 0.05
dense_units: 160
learning_rate: 0.0001
epochs: 80
Score: 0.010465066228061915
Trial summary
Hyperparameters:
lstm_units: 448
lstm_dropout_rate: 0.35000000000000003
conv__units: 352
conv_dropout_rate: 0.5500000000000002
dense_units: 256
learning_rate: 0.0001
epochs: 80
Score: 0.010500004049390554
Trial summary
Hyperparameters:
lstm_units: 192
lstm_dropout_rate: 0.45000000000000007
conv__units: 96
conv_dropout_rate: 0.45000000000000007
dense_units: 512
learning_rate: 0.0001
epochs: 70
Score: 0.010581668466329575
Trial summary
Hyperparameters:
lstm_units: 480
lstm_dropout_rate: 0.35000000000000003
conv__units: 480
conv_dropout_rate: 0.8500000000000002
dense_units: 384
learning_rate: 0.0001
epochs: 70
Score: 0.0109522443264