In [20]:
import import_ipynb
from load_process_data import load_data, preprocess_data, truncate_pad_data, shuffle_data
from models import BaseGRU, MultiHeadCnnRnn
from keras.regularizers import l2
from model_eval import cross_validate
from visualize import visualize_accuracies
import os


In [29]:
# Define hyperparameters

input_shape = (81, 23)  # 81 timesteps with 23 features
num_folds = 5  # Cross-validation
epochs = 10
percentage_map = {
    (0, 25): 0.25,   # 25% of data with 0-25% of the original length
    (25, 50): 0.25,  # 25% of data with 25-50% of the original length
    (50, 75): 0.25,  # 25% of data with 50-75% of the original length
    (75, 100): 0.25  # 25% of data with 75-100% of the original length
}

test_splits = {(49, 50): 1}

# GRU hyperparameters

learning_rate = 0.002
dropout = 0.1
recurrent_dropout = 0.1
kernel_regularizer = l2(0.01)  # Regularization strength for kernel
recurrent_regularizer = l2(0.01)  # Regularization strength for recurrent connections

# CNN hyperparameters

kernel_sizes = [8, 5, 3]
filters = [16, 32, 64] 
learning_rate = 0.001
weight_decay = 0.01 

In [37]:
# Create models with regularization

GRUModel = BaseGRU(input_shape=input_shape, 
                learning_rate=learning_rate, 
                dropout=dropout, 
                recurrent_dropout=recurrent_dropout,
                kernel_regularizer=kernel_regularizer, 
                recurrent_regularizer=recurrent_regularizer)

MultiHeadModel = MultiHeadCnnRnn(input_shape=input_shape,
                                 kernel_sizes=kernel_sizes,
                                 filters=filters,
                                 learning_rate=learning_rate,
                                 weight_decay=weight_decay,
                                 dropout=dropout, 
                                 recurrent_dropout=recurrent_dropout,
                                 kernel_regularizer=kernel_regularizer, 
                                 recurrent_regularizer=recurrent_regularizer)

Feature amount: 23


In [41]:
# Loading data 

current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
data_path = os.path.join(parent_directory, 'data/')
train_y_list, test_y_list, train_X_list, test_X_list = load_data(data_path)

# Preprocessing data (reshape + standardization)

train_x_list_filtered, test_x_list_filtered = preprocess_data(train_X_list, test_X_list)
print(test_x_list_filtered[0].shape)

Loading data ...
Data shapes: 
train_x_list: (5,)
training data within each fold: (16063, 81, 25)
train_y_list: (5,)
training label within each fold: (16063,)
Preprocessing data ...
(4016, 81, 23)


In [26]:
# Reshaping time series train / test data shape for real-time forecasting

train_x_list_random = []
train_y_list_random = []
test_x_list_random = []
test_y_list_random = []

for i, fold in enumerate(train_x_list_filtered):
    train_x_list_truncated = truncate_pad_data(fold, percentage_map, input_shape[0])
    train_x_list_shuffled, train_y_list_shuffled = shuffle_data(train_x_list_truncated, train_y_list[i])
    train_x_list_random.append(train_x_list_shuffled)
    train_y_list_random.append(train_y_list_shuffled)
    
for i, fold in enumerate(test_x_list_filtered):
    test_x_list_truncated = truncate_pad_data(fold, test_splits, input_shape[0])
    test_x_list_shuffled, test_y_list_shuffled = shuffle_data(test_x_list_truncated, test_y_list[i])
    test_x_list_random.append(test_x_list_shuffled)
    test_y_list_random.append(test_y_list_shuffled)
    
print(f"Fold shape: {train_x_list_random[0].shape}")


Fold shape: (16060, 81, 23)


In [35]:
print(train_x_list_random[0])

[[[-1.27571732 -0.22900461  1.12895998 ...  0.34654717  0.51255905
    0.26715782]
  [-1.27541354 -0.22806234  1.12895998 ...  0.3657645   0.55689175
    0.26715782]
  [-1.27508566 -0.22713599  1.12895998 ...  0.35818073  0.51255905
    0.26715782]
  ...
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]]

 [[ 0.9234803   2.25723896  1.12895998 ...  0.23839426  0.56978045
    0.26715782]
  [ 0.92377276  2.25807368  1.12895998 ...  0.15339498  0.52502227
    0.26715782]
  [ 0.92413672  2.25887963  1.12895998 ...  0.13809446  0.56978045
    0.26715782]
  ...
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]]

 [[-0.96013653  1.24394384  1.128959

In [47]:
# BaseGRU training and testing
print(train_x_list_random[4].shape)
print(train_y_list_random[4].shape)

print(test_x_list_filtered[4].shape)

print(test_y_list[4].shape)

train_accuracies, test_accuracies = cross_validate(GRUModel, num_folds, train_x_list_random, 
                                                   train_y_list_random, test_x_list_filtered, test_y_list)


(16064, 81, 23)
(16064,)
(4015, 81, 23)
(4015,)
Testing on Base GRU class: 
Training on fold 1/5
Epoch 1/10
 37/502 [=>............................] - ETA: 20s - loss: 0.6922 - accuracy: 0.5245

KeyboardInterrupt: 