### Import Packages

In [2]:
import os
import random
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from keras.models import load_model

seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

print("TensorFlow version: ", tf.__version__)
print("Number of GPUs available: ", len(tf.config.list_physical_devices('GPU')))

TensorFlow version:  2.19.0
Number of GPUs available:  1


### Data Preprocessing & Preparation

In [3]:
time_step = 6
wind_cols = ['uSq','e_uSq','n_uSq','Z2','Z3','Z4']
scaled_wind_cols = ['scaled_' + col for col in wind_cols]

# Load datasets
df1 = pd.read_csv('Data_2004_2013.csv')
df2 = pd.read_csv('Data_1973_2004.csv')

# Set the timestamp column as the index
df1['TimeStamp_1'] = pd.to_datetime(df1['TimeStamp_1'], format='mixed')
df2['TimeStamp_1'] = pd.to_datetime(df2['TimeStamp_1'], format='mixed')
df1.set_index('TimeStamp_1', inplace=True)
df2.set_index('TimeStamp_1', inplace=True)

# df_train: [2004-11-01 00:00:00 ~ 2007-10-31 23:00:00, 2008-11-01 00:00:00 ~ 2013-10-31 23:00:00]
split1 = pd.to_datetime('2007-10-31 23:59:00')
split2 = pd.to_datetime('2008-10-31 23:59:00')
df_train = df1[(df1.index < split1) | (df1.index > split2)]

# Data normalization
scaled_wave = MinMaxScaler()
scaled_wave = scaled_wave.fit(df_train[['H']])

# Data preparation
def create_sequences(data, time_step):
    X = []
    for i in range(len(data)-time_step):
        X.append(data[i:(i+time_step+1), :])  # Past hours wind features
    return np.array(X)

X = create_sequences(df2[scaled_wind_cols].values, time_step)
print(X.shape)

(271746, 7, 6)


### Model Deployment

In [4]:
best_shifts = [4.130, 4.130, 4.130, 5.000, 5.000, 5.000, 2.350, 4.500, -2.680, -2.540, 4.130, 4.130]

# Adjust the shape of Sigmoid function
def adjusted_probs(p, shift):
    p = np.clip(p, 1e-10, 1 - 1e-10)
    logit = np.log(p / (1 - p))
    return 1 / (1 + np.exp(-(logit - shift)))

# Deploy model_reg0
model_reg0 = load_model('best_model_reg0.keras')
df2['H_pred0'] = 0.0
df2.loc[df2.index[time_step:], 'H_pred0'] = model_reg0.predict(X, verbose=0)

# Deploy model_cls
model_cls = load_model('best_model_cls.keras')
df2['Spike_prob'] = 0.0
df2.loc[df2.index[time_step:], 'Spike_prob'] = model_cls.predict(X, verbose=0)

# Deploy model_reg1
model_reg1 = load_model('model_reg1.keras')
df2['H_pred1'] = 0.0
df2.loc[df2.index[time_step:], 'H_pred1'] = model_reg1.predict(X, verbose=0)

# Deploy model_stack
for i in range(12):
    month_mask = (df2.index.month == i+1)
    df2.loc[month_mask, 'Spike_prob'] = adjusted_probs(df2.loc[month_mask, 'Spike_prob'], best_shifts[i])

H_pred = (1 - df2['Spike_prob']) * df2['H_pred0'] + df2['Spike_prob'] * df2['H_pred1']
df2['H_pred'] = scaled_wave.inverse_transform(H_pred.values.reshape(-1, 1))

# Save to CSV
df2.to_csv('Data_1973_2004.csv')
df2.head(10)

I0000 00:00:1751479570.404873 1586641 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38483 MB memory:  -> device: 0, name: NVIDIA A100-PCIE-40GB, pci bus id: 0000:81:00.0, compute capability: 8.0
I0000 00:00:1751479571.750314 1586991 cuda_dnn.cc:529] Loaded cuDNN version 90300


Unnamed: 0_level_0,uSq,e_uSq,n_uSq,Z2,Z3,Z4,scaled_uSq,scaled_e_uSq,scaled_n_uSq,scaled_Z2,scaled_Z3,scaled_Z4,H_pred0,Spike_prob,H_pred1,H_pred
TimeStamp_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1973-11-01 00:00:00,84.476135,57.612586,61.781934,1165.979518,16093.4,222128.707649,0.203203,0.783349,0.588009,0.256944,0.042553,0.007747,0.0,1.608288e-12,0.0,0.0
1973-11-01 01:00:00,72.305431,57.745684,43.514494,1078.721566,16093.4,240096.732733,0.173217,0.783573,0.54365,0.234647,0.042553,0.008772,0.0,1.608288e-12,0.0,0.0
1973-11-01 02:00:00,84.476135,67.465641,50.839007,1165.979518,16093.4,222128.707649,0.203203,0.79995,0.561436,0.256944,0.042553,0.007747,0.0,1.608288e-12,0.0,0.0
1973-11-01 03:00:00,77.720269,69.249267,35.284264,1118.384272,16093.4,231581.872272,0.186558,0.802955,0.523664,0.244782,0.042553,0.008286,0.0,1.608288e-12,0.0,0.0
1973-11-01 04:00:00,84.476135,75.268788,38.351363,1165.979518,16093.4,222128.707649,0.203203,0.813097,0.531112,0.256944,0.042553,0.007747,0.0,1.608288e-12,0.0,0.0
1973-11-01 05:00:00,84.476135,67.465641,50.839007,1165.979518,16093.4,222128.707649,0.203203,0.79995,0.561436,0.256944,0.042553,0.007747,0.0,1.608288e-12,0.0,0.0
1973-11-01 06:00:00,72.305431,3.784174,72.206339,1078.721566,16093.4,240096.732733,0.173217,0.692656,0.613323,0.234647,0.042553,0.008772,0.313025,0.1111145,0.438039,3.324931
1973-11-01 07:00:00,48.163739,-5.869683,47.804734,880.408038,16093.4,294178.962794,0.113738,0.676391,0.554068,0.183973,0.042553,0.011857,0.237922,0.06775903,0.430285,2.552374
1973-11-01 08:00:00,90.321074,4.72704,90.197292,1205.642224,16093.4,214821.211816,0.217604,0.694245,0.657011,0.267079,0.042553,0.00733,0.221435,0.107804,0.43617,2.487567
1973-11-01 09:00:00,96.361513,5.043172,96.229453,1245.304929,16093.4,207979.200458,0.232486,0.694777,0.671659,0.277214,0.042553,0.00694,0.258804,0.0951355,0.429174,2.797037
