In [1]:
window_size = [7,14,30,60]
num_features = 8 #depends on how much features we have
output_days = 7
output_features = 2 #min and max

Meta Model building

In [None]:
import tensorflow as tf
from tensorflow.keras import layers,models

In [None]:
def baseline_LSTM_model(window_size, num_features, output_days, output_features):
    model = models.Sequential()
    model.add(layers.LSTM(100, input_shape=(window_size, num_features), return_sequences=False))
    model.add(layers.LSTM(100))
    model.add(layers.Dense(output_days * output_features))  # Output for 7 days * 2 features (min and max)
    model.add(layers.Reshape((output_days, output_features)))
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    
    return model

In [None]:
def baseline_GRU_model(window_size, num_features, output_days, output_features):
    model = models.Sequential()
    model.add(layers.GRU(100, input_shape=(window_size, num_features), return_sequences=False))
    model.add(layers.Dense(output_days * output_features))  # Output for 7 days * 2 features (min and max)
    model.add(layers.Reshape((output_days, output_features)))
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    
    return model

In [2]:
'''
CNN_LSTM mdoel is a combination of CNN and LSTM. CNN is used to extract the features from the input data and LSTM is used to support the sequence data.
'''
def CNN_LSTM_model(window_size, num_features, output_days, output_features):
    model = models.Sequential()
    model.add(layers.Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(window_size, num_features)))
    model.add(layers.MaxPooling1D(pool_size=2))
    model.add(layers.Conv1D(filters=128, kernel_size=2, activation='relu'))
    model.add(layers.MaxPooling1D(pool_size=2))
    model.add(layers.LSTM(100), return_sequences=True)
    model.add(layers.LSTM(100))
    model.add(layers.Dense(output_days*output_features))  # 7 days * 2 targets (High, Low)
    model.add(layers.Reshape((7, 2))) # Reshape output to (7, 2)

    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return model
#can use KAN, Attenion layer to adjust the weight

In [None]:
# Define the CNN + GRU model
def CNN_GRU_model(window_size, num_features, output_days, output_features):
    model = models.Sequential()
    
    model.add(layers.Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(window_size, num_features)))
    model.add(layers.MaxPooling1D(pool_size=2))
    model.add(layers.Conv1D(filters=128, kernel_size=2, activation='relu'))
    model.add(layers.MaxPooling1D(pool_size=2))
    
    model.add(layers.GRU(100, return_sequences=False))
    
    model.add(layers.Dense(output_days * output_features))  # Output for 7 days * 2 features (min and max)
    
    model.add(layers.Reshape((output_days, output_features)))
    
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    
    return model

In [7]:
from tensorflow.keras.layers import Attention
def CNN_LSTM_SA_model(window_size, num_features, output_days, output_features):
    inputs = layers.Input(shape=(window_size, num_features))
    x = layers.Conv1D(filters=64, kernel_size=2, activation='relu')(inputs)
    x = layers.MaxPooling1D(pool_size=2)(x)
    x = layers.Conv1D(filters=128, kernel_size=2, activation='relu')(x)
    x = layers.MaxPooling1D(pool_size=2)(x)
    x = layers.LSTM(100, return_sequences=True)(x)
    x = layers.LSTM(100, return_sequences=True)(x)
    
    # Attention mechanism
    attention = Attention()([x, x])
    x = layers.Concatenate()([x, attention])
    x = layers.GlobalAveragePooling1D()(x)
    
    x = layers.Dense(output_days*output_features)(x)
    outputs = layers.Reshape((output_days, output_features))(x)
    
    model = models.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return model

In [None]:
def CNN_GRU_SA_model(window_size, num_features, output_days, output_features):
    inputs = layers.Input(shape=(window_size, num_features))
    
    x = layers.Conv1D(filters=64, kernel_size=2, activation='relu')(inputs)
    x = layers.MaxPooling1D(pool_size=2)(x)
    x = layers.Conv1D(filters=128, kernel_size=2, activation='relu')(x)
    x = layers.MaxPooling1D(pool_size=2)(x)
    
    x = layers.GRU(100, return_sequences=True)(x)
    
    attention = layers.Attention()([x, x])  # Self-attention (query = value = x)
    
    x = layers.Dense(output_days * output_features)(attention)
    
    outputs = layers.Reshape((output_days, output_features))(x)
    
    model = models.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    
    return model

Meta Model training 

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
seed = 42
test_size = 0.8
epochs = 20
batch_size = 8
shuffle=True
for window in window_size:
    model = CNN_LSTM_model(window, num_features, output_days, output_features)
    X = pd.read_pickle(f"/Users/hoyinchui/Downloads/GLD_model_testing_data_i_v5_pkl/X_{window}days_i.pkl")
    y = pd.read_pickle(f"/Users/hoyinchui/Downloads/GLD_model_testing_data_i_v5_pkl/y_{window}days_i.pkl")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=shuffle, random_state=seed)
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1)
    #provide a validation data to see the performance
    test_loss, test_mae = model.evaluate(X_test, y_test)
    # Plot the loss and metrics during training
    plt.figure(figsize=(12, 6))
    # Plot loss
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Loss over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    # Plot MAE
    plt.subplot(1, 2, 2)
    plt.plot(history.history['mae'], label='Training MAE')
    plt.plot(history.history['val_mae'], label='Validation MAE')
    plt.title('MAE over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('MAE')
    plt.legend()
    plt.tight_layout()
    plt.show()
    model.save(f"CNN_LSTM_{window}days.h5")

Build ensemble model with meta mode (transfer learning/fine-tuning), since will do back propaggation through the ensemble layer all the way to the individual model, we can use multiple stock training data for the individual model, increase the generalization, and do the tuning with the target data, in the ensemble layer training part

In [9]:
from tensorflow.keras.models import Model
def ensemble_stacking (models,output_days, output_features):
    models_inputs = [] 
    models_outputs = []
    for model in models:
        model = models.load_model(model)
        models_inputs.append(model.input)
        models_outputs.append(model.output)
    merged_output = layers.concatenate(models_outputs, axis=-1)
    #searching method for removing the last layer of the model, and directly inputting weight into stackinng model
    
    #stacking model
    merged_output = layers.Dense(64, activation='relu')(merged_output)
    final_output = layers.Reshape((output_days, output_features))(merged_output)
    ensemble_model = Model(inputs=models_inputs, outputs=final_output)
    ensemble_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return ensemble_model


    

In [None]:
def ensemble_weighting (models,output_days, output_features):
    models_inputs = [] 
    models_outputs = []
    for model in models:
        model = models.load_model(model)
        models_inputs.append(model.input)
        models_outputs.append(model.output)
    merged_output = layers.Add()(models_outputs)
    merged_output = layers.Dense(1)(merged_output)
    final_output = layers.Reshape((output_days, output_features))(merged_output)
    ensemble_model = Model(inputs=models_inputs, outputs=final_output)
    ensemble_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return ensemble_model


In [10]:
#need more understanding on the MoE
''' stacking is y_p = sum(w*y_i)
    MoE is y_p = sum(g_i*y_i)
    can add other function model, like anomaly detection, to the ensemble model, since most of the data is normal, the model can be used to detect the anomaly data(Event)
'''
def ensemble_MoE (models,output_days, output_features):
    models_inputs = [] 
    models_outputs = []
    for model in models:
        model = models.load_model(model)
        models_inputs.append(model.input)
        models_outputs.append(model.output)
    merged_output = layers.Add()(models_outputs)
    gate = layers.Dense(len(models), activation='softmax')(merged_output)
    expert_outputs = layers.Multiply()([gate, models_outputs])
    final_output = layers.Reshape((output_days, output_features))(expert_outputs)
    ensemble_model = Model(inputs=models_inputs, outputs=final_output)
    ensemble_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return ensemble_model

### the following 3 cell can skip, theey are old version for reference

In [None]:
#load the model and predict and save the prediction
#for window in window_size:
#    model = tf.keras.models.load_model(f"CNN_LSTM_{window}days.h5")
#    y_pred = model.predict(X_test)
#    pd.DataFrame(y_pred).to_csv(f"y_pred_CNN_LSTM_{window}days.csv")

In [None]:
#putting all windows prediction into the dataframe as X
#import numpy as np
#X_ensemble = pd.DataFrame()
#for window in window_size:
#    X_ensemble = pd.concat([X_ensemble, pd.read_csv(f"y_pred_CNN_LSTM_{window}days.csv")], axis=1)
#    #combein the columns, so that can be used as input for the ensemble model
#   X_ensemble = X_ensemble.applymap(lambda x: np.vstack(x))

They can combein in 1 for loop, I just split it for clear

In [None]:
#def train_test_split_ensemble(X,y,test_size=0.5, seed=42):    
#    #split again for the prediction model and the ensemble model
#    #Since we already shuffled the data, we can just split the data in half, and easaier to manage
#    X_train_ensemble, X_test_ensemble, y_train_ensemble, y_test_ensemble = train_test_split(X, y, test_size=test_size, shuffle=False, random_state=seed)
#    return X_train_ensemble, X_test_ensemble, y_train_ensemble, y_test_ensemble

In [None]:
#X_train_ensemble, X_test_ensemble, y_train_ensemble, y_test_ensemble = train_test_split_ensemble(X_ensemble, y_test)

### Ensemble Model (TBC)

In [3]:
#from tensorflow.keras.models import Model
#def ensemble_CNN_LSTM(window_size, num_features):
#    inputs = layers.Input(shape=(window_size, num_features))
#    x = layers.Conv1D(filters=64, kernel_size=2, activation='relu')(inputs)
#    x = layers.MaxPooling1D(pool_size=2)(x)
#    x = layers.Conv1D(filters=128, kernel_size=2, activation='relu')(x)
#    x = layers.MaxPooling1D(pool_size=2)(x)
#    x = layers.LSTM(100)(x)
#    model = Model(inputs=inputs, outputs=x)
#    return model

In [None]:
#def ensemble_s_models(window_size, num_features, output_days, output_features):
#        models = []
#    for window in window_size:
#        model = ensemble_CNN_LSTM(window, num_features)
#        models.append(model)

In [None]:
#def ensemble_MoE_models(window_size, num_features, output_days, output_features):


In [4]:
#def ensemble_h_models(window_size, num_features, output_days, output_features):
#    models = []
#    for window in window_size:
#        model = ensemble_CNN_LSTM(window, num_features)
#        models.append(model)
#    models_inputs = [model.input for model in models]
#    models_outputs = [model.output for model in models]
#   merged = layers.concatenate(models_outputs, axis=-1)
#    merged_output = layers.Dense(output_days * output_features)(merged)
#    final_output = layers.Reshape((output_days, output_features))(merged_output)
#    ensemble_model = Model(inputs=models_inputs, outputs=final_output)
#    ensemble_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
#
#    return ensemble_model

In [6]:
#from sklearn.model_selection import train_test_split
#import pandas as pd
#seed = 42
#test_size = 0.8 # we need to consider for the ensemble model's training and ttesting data, since it cannot use the same training data
#epochs = 20
#batch_size = 8
#shuffle=True
#all_X_train = []
#all_X_test = []
#all_y_train = []
#all_y_test = []
#
#y = pd.read_pickle(f"/Users/hoyinchui/Downloads/y_a.pkl")
#for window in window_size:
#    X = pd.read_pickle(f"/Users/hoyinchui/Downloads/X_{window}days_a.pkl")
#    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=shuffle, random_state=seed)
#    all_X_train.append(X_train)
#    all_X_test.append(X_test)
#    all_y_train.append(y_train)
#    all_y_test.append(y_test)
#
##ensemble_model = ensemble_h_models(window_size, num_features, output_days, output_features)
##history = ensemble_model.fit(all_X_train, all_y_train[0], epochs=epochs, batch_size=batch_size, validation_data=(all_X_test, all_y_test[0]), verbose=1)
##ensemble_model.save(f"ensemble_CNN_LSTM.h5")
#

Epoch 1/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - loss: 0.0279 - mae: 0.1027 - val_loss: 5.5140e-04 - val_mae: 0.0176
Epoch 2/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - loss: 5.2824e-04 - mae: 0.0169 - val_loss: 4.1390e-04 - val_mae: 0.0154
Epoch 3/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - loss: 5.1239e-04 - mae: 0.0167 - val_loss: 4.6971e-04 - val_mae: 0.0162
Epoch 4/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - loss: 4.3028e-04 - mae: 0.0155 - val_loss: 0.0011 - val_mae: 0.0263
Epoch 5/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - loss: 7.6607e-04 - mae: 0.0210 - val_loss: 4.8477e-04 - val_mae: 0.0170
Epoch 6/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - loss: 4.1826e-04 - mae: 0.0152 - val_loss: 3.3729e-04 - val_mae: 0.0135
Epoch 7/20
[1m126/126[0m [32m━━━━━━━━━━

