In [1]:
%load_ext autoreload

In [2]:
import itertools

import numpy as np
import pandas as pd
pd.set_option("max_columns", None)
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.multioutput import MultiOutputRegressor
from lightgbm import LGBMRegressor
from scipy.spatial.distance import jensenshannon
from scipy import signal
from scipy.special import rel_entr

from time_delay_embedding import time_delay_embedding
from feature_extraction import covariance, co_integration, covariance

## Baseline

### data

In [3]:
buoy = pd.read_csv(
    "data/smart_buoy.csv",
    skiprows = [1],
    parse_dates = ["time"],
)
buoy.set_index("time", inplace = True)
buoy = buoy.resample("H").mean()
buoy.columns = [
    "PeakP",
    "PeakD",
    "Upcross",
    "SWH",
    "SeaTemp",
    "Hmax",
    "THmax",
    "MCurDir",
    "MCurSpd",
]
buoy.head()

Unnamed: 0_level_0,PeakP,PeakD,Upcross,SWH,SeaTemp,Hmax,THmax,MCurDir,MCurSpd
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022-01-01 00:00:00+00:00,4.01,162.15385,3.89,146.0,10.130833,267.5,4.675,178.051281,0.137
2022-01-01 01:00:00+00:00,4.13,161.362635,3.925,142.5,10.135,206.0,4.08,179.882784,0.126
2022-01-01 02:00:00+00:00,4.085,158.46154,3.855,143.0,10.118333,257.5,4.535,295.648351,0.178667
2022-01-01 03:00:00+00:00,5.985,191.428575,4.075,153.5,10.143333,260.0,8.17,339.413908,0.224333
2022-01-01 04:00:00+00:00,5.97,221.186815,4.305,171.0,10.143333,292.0,4.995,288.586082,0.211667


In [4]:
buoy.shape

(7835, 9)

### AR-Auto-Regressive Distributed Lags

In [5]:
# lagged features using time delay embedding
buoy_ds = []
for col in buoy:
    col_df = time_delay_embedding(buoy[col], n_lags = 24, horizon = 12)
    buoy_ds.append(col_df)
buoy_df = pd.concat(buoy_ds, axis = 1).dropna()
buoy_df.head()

Unnamed: 0_level_0,PeakP(t-23),PeakP(t-22),PeakP(t-21),PeakP(t-20),PeakP(t-19),PeakP(t-18),PeakP(t-17),PeakP(t-16),PeakP(t-15),PeakP(t-14),PeakP(t-13),PeakP(t-12),PeakP(t-11),PeakP(t-10),PeakP(t-9),PeakP(t-8),PeakP(t-7),PeakP(t-6),PeakP(t-5),PeakP(t-4),PeakP(t-3),PeakP(t-2),PeakP(t-1),PeakP(t-0),PeakP(t+1),PeakP(t+2),PeakP(t+3),PeakP(t+4),PeakP(t+5),PeakP(t+6),PeakP(t+7),PeakP(t+8),PeakP(t+9),PeakP(t+10),PeakP(t+11),PeakP(t+12),PeakD(t-23),PeakD(t-22),PeakD(t-21),PeakD(t-20),PeakD(t-19),PeakD(t-18),PeakD(t-17),PeakD(t-16),PeakD(t-15),PeakD(t-14),PeakD(t-13),PeakD(t-12),PeakD(t-11),PeakD(t-10),PeakD(t-9),PeakD(t-8),PeakD(t-7),PeakD(t-6),PeakD(t-5),PeakD(t-4),PeakD(t-3),PeakD(t-2),PeakD(t-1),PeakD(t-0),PeakD(t+1),PeakD(t+2),PeakD(t+3),PeakD(t+4),PeakD(t+5),PeakD(t+6),PeakD(t+7),PeakD(t+8),PeakD(t+9),PeakD(t+10),PeakD(t+11),PeakD(t+12),Upcross(t-23),Upcross(t-22),Upcross(t-21),Upcross(t-20),Upcross(t-19),Upcross(t-18),Upcross(t-17),Upcross(t-16),Upcross(t-15),Upcross(t-14),Upcross(t-13),Upcross(t-12),Upcross(t-11),Upcross(t-10),Upcross(t-9),Upcross(t-8),Upcross(t-7),Upcross(t-6),Upcross(t-5),Upcross(t-4),Upcross(t-3),Upcross(t-2),Upcross(t-1),Upcross(t-0),Upcross(t+1),Upcross(t+2),Upcross(t+3),Upcross(t+4),Upcross(t+5),Upcross(t+6),Upcross(t+7),Upcross(t+8),Upcross(t+9),Upcross(t+10),Upcross(t+11),Upcross(t+12),SWH(t-23),SWH(t-22),SWH(t-21),SWH(t-20),SWH(t-19),SWH(t-18),SWH(t-17),SWH(t-16),SWH(t-15),SWH(t-14),SWH(t-13),SWH(t-12),SWH(t-11),SWH(t-10),SWH(t-9),SWH(t-8),SWH(t-7),SWH(t-6),SWH(t-5),SWH(t-4),SWH(t-3),SWH(t-2),SWH(t-1),SWH(t-0),SWH(t+1),SWH(t+2),SWH(t+3),SWH(t+4),SWH(t+5),SWH(t+6),SWH(t+7),SWH(t+8),SWH(t+9),SWH(t+10),SWH(t+11),SWH(t+12),SeaTemp(t-23),SeaTemp(t-22),SeaTemp(t-21),SeaTemp(t-20),SeaTemp(t-19),SeaTemp(t-18),SeaTemp(t-17),SeaTemp(t-16),SeaTemp(t-15),SeaTemp(t-14),SeaTemp(t-13),SeaTemp(t-12),SeaTemp(t-11),SeaTemp(t-10),SeaTemp(t-9),SeaTemp(t-8),SeaTemp(t-7),SeaTemp(t-6),SeaTemp(t-5),SeaTemp(t-4),SeaTemp(t-3),SeaTemp(t-2),SeaTemp(t-1),SeaTemp(t-0),SeaTemp(t+1),SeaTemp(t+2),SeaTemp(t+3),SeaTemp(t+4),SeaTemp(t+5),SeaTemp(t+6),SeaTemp(t+7),SeaTemp(t+8),SeaTemp(t+9),SeaTemp(t+10),SeaTemp(t+11),SeaTemp(t+12),Hmax(t-23),Hmax(t-22),Hmax(t-21),Hmax(t-20),Hmax(t-19),Hmax(t-18),Hmax(t-17),Hmax(t-16),Hmax(t-15),Hmax(t-14),Hmax(t-13),Hmax(t-12),Hmax(t-11),Hmax(t-10),Hmax(t-9),Hmax(t-8),Hmax(t-7),Hmax(t-6),Hmax(t-5),Hmax(t-4),Hmax(t-3),Hmax(t-2),Hmax(t-1),Hmax(t-0),Hmax(t+1),Hmax(t+2),Hmax(t+3),Hmax(t+4),Hmax(t+5),Hmax(t+6),Hmax(t+7),Hmax(t+8),Hmax(t+9),Hmax(t+10),Hmax(t+11),Hmax(t+12),THmax(t-23),THmax(t-22),THmax(t-21),THmax(t-20),THmax(t-19),THmax(t-18),THmax(t-17),THmax(t-16),THmax(t-15),THmax(t-14),THmax(t-13),THmax(t-12),THmax(t-11),THmax(t-10),THmax(t-9),THmax(t-8),THmax(t-7),THmax(t-6),THmax(t-5),THmax(t-4),THmax(t-3),THmax(t-2),THmax(t-1),THmax(t-0),THmax(t+1),THmax(t+2),THmax(t+3),THmax(t+4),THmax(t+5),THmax(t+6),THmax(t+7),THmax(t+8),THmax(t+9),THmax(t+10),THmax(t+11),THmax(t+12),MCurDir(t-23),MCurDir(t-22),MCurDir(t-21),MCurDir(t-20),MCurDir(t-19),MCurDir(t-18),MCurDir(t-17),MCurDir(t-16),MCurDir(t-15),MCurDir(t-14),MCurDir(t-13),MCurDir(t-12),MCurDir(t-11),MCurDir(t-10),MCurDir(t-9),MCurDir(t-8),MCurDir(t-7),MCurDir(t-6),MCurDir(t-5),MCurDir(t-4),MCurDir(t-3),MCurDir(t-2),MCurDir(t-1),MCurDir(t-0),MCurDir(t+1),MCurDir(t+2),MCurDir(t+3),MCurDir(t+4),MCurDir(t+5),MCurDir(t+6),MCurDir(t+7),MCurDir(t+8),MCurDir(t+9),MCurDir(t+10),MCurDir(t+11),MCurDir(t+12),MCurSpd(t-23),MCurSpd(t-22),MCurSpd(t-21),MCurSpd(t-20),MCurSpd(t-19),MCurSpd(t-18),MCurSpd(t-17),MCurSpd(t-16),MCurSpd(t-15),MCurSpd(t-14),MCurSpd(t-13),MCurSpd(t-12),MCurSpd(t-11),MCurSpd(t-10),MCurSpd(t-9),MCurSpd(t-8),MCurSpd(t-7),MCurSpd(t-6),MCurSpd(t-5),MCurSpd(t-4),MCurSpd(t-3),MCurSpd(t-2),MCurSpd(t-1),MCurSpd(t-0),MCurSpd(t+1),MCurSpd(t+2),MCurSpd(t+3),MCurSpd(t+4),MCurSpd(t+5),MCurSpd(t+6),MCurSpd(t+7),MCurSpd(t+8),MCurSpd(t+9),MCurSpd(t+10),MCurSpd(t+11),MCurSpd(t+12)
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1
2022-01-02 00:00:00+00:00,4.01,4.13,4.085,5.985,5.97,7.18,8.515,7.38,7.38,7.275,7.415,9.03,6.255,9.89,9.03,6.645,9.005,6.905,8.195,11.805,12.5,12.915,10.82,11.435,11.805,8.66,11.435,8.66,5.645,5.98,6.165,6.675,6.48,6.39,6.255,6.35,162.15385,161.362635,158.46154,191.428575,221.186815,222.9011,225.01099,224.52747,222.373625,223.032965,219.86813,223.34066,222.549445,225.142855,220.395605,226.549455,221.934065,227.73627,233.14286,225.93407,229.758245,228.21978,231.38461,230.06593,227.25275,225.186815,230.63737,228.30769,226.549455,222.68132,225.406595,227.648355,224.923075,227.428575,221.714285,221.84615,3.89,3.925,3.855,4.075,4.305,5.05,5.61,5.775,5.54,5.495,5.24,5.14,5.085,5.22,5.01,5.17,5.185,5.035,5.155,4.995,4.72,4.45,4.35,4.33,4.335,4.16,4.165,4.245,4.59,4.85,5.105,5.05,4.99,4.915,4.855,4.885,146.0,142.5,143.0,153.5,171.0,214.5,269.5,273.0,245.5,244.5,219.0,211.5,212.5,205.0,206.0,216.0,192.5,167.5,164.0,145.5,129.0,123.0,123.0,130.0,136.5,133.0,136.5,145.5,170.5,184.5,211.5,209.5,207.0,202.0,208.5,215.0,10.130833,10.135,10.118333,10.143333,10.143333,10.130833,10.076667,10.07,10.046667,10.06,10.046667,10.053333,10.07,10.07,10.07,10.07,10.07,10.07,10.07,10.043333,10.03,10.006667,10.003333,9.996667,9.98,9.966667,9.956667,9.976667,9.99,9.99,9.993333,9.99,9.99,9.99,9.99,9.99,267.5,206.0,257.5,260.0,292.0,318.0,473.0,421.0,400.0,370.0,383.5,325.0,307.0,326.0,295.0,365.0,314.0,281.5,236.0,214.0,194.5,181.5,175.5,189.0,209.5,190.5,227.5,226.5,269.0,303.0,348.0,342.5,331.0,301.0,323.0,349.0,4.675,4.08,4.535,8.17,4.995,6.97,6.99,6.965,6.925,8.335,6.61,8.035,6.82,6.665,6.985,6.765,6.35,6.8,10.425,8.1,7.985,7.575,9.21,8.145,7.67,7.28,6.345,6.02,6.97,5.85,5.94,5.6,6.09,6.055,5.88,5.575,178.051281,179.882784,295.648351,339.413908,288.586082,172.717947,130.461544,126.622711,43.399266,57.509158,64.043956,75.692308,79.340658,80.87912,73.201465,73.245423,68.395606,103.296706,224.249082,224.307688,170.109886,239.956039,42.124543,40.029304,54.124542,55.311355,57.934066,59.633699,46.212454,35.560439,16.263737,121.582418,121.186812,19.897436,44.102564,56.87912,0.137,0.126,0.178667,0.224333,0.211667,0.173167,0.109333,0.077,0.097,0.2005,0.165,0.227833,0.240833,0.161833,0.219333,0.185333,0.091,0.0555,0.031667,0.027667,0.033833,0.050667,0.077,0.129833,0.1705,0.161833,0.2215,0.201833,0.210833,0.1265,0.095167,0.072667,0.086167,0.082833,0.146833,0.185
2022-01-02 01:00:00+00:00,4.13,4.085,5.985,5.97,7.18,8.515,7.38,7.38,7.275,7.415,9.03,6.255,9.89,9.03,6.645,9.005,6.905,8.195,11.805,12.5,12.915,10.82,11.435,11.805,8.66,11.435,8.66,5.645,5.98,6.165,6.675,6.48,6.39,6.255,6.35,6.335,161.362635,158.46154,191.428575,221.186815,222.9011,225.01099,224.52747,222.373625,223.032965,219.86813,223.34066,222.549445,225.142855,220.395605,226.549455,221.934065,227.73627,233.14286,225.93407,229.758245,228.21978,231.38461,230.06593,227.25275,225.186815,230.63737,228.30769,226.549455,222.68132,225.406595,227.648355,224.923075,227.428575,221.714285,221.84615,227.6044,3.925,3.855,4.075,4.305,5.05,5.61,5.775,5.54,5.495,5.24,5.14,5.085,5.22,5.01,5.17,5.185,5.035,5.155,4.995,4.72,4.45,4.35,4.33,4.335,4.16,4.165,4.245,4.59,4.85,5.105,5.05,4.99,4.915,4.855,4.885,4.61,142.5,143.0,153.5,171.0,214.5,269.5,273.0,245.5,244.5,219.0,211.5,212.5,205.0,206.0,216.0,192.5,167.5,164.0,145.5,129.0,123.0,123.0,130.0,136.5,133.0,136.5,145.5,170.5,184.5,211.5,209.5,207.0,202.0,208.5,215.0,193.0,10.135,10.118333,10.143333,10.143333,10.130833,10.076667,10.07,10.046667,10.06,10.046667,10.053333,10.07,10.07,10.07,10.07,10.07,10.07,10.07,10.043333,10.03,10.006667,10.003333,9.996667,9.98,9.966667,9.956667,9.976667,9.99,9.99,9.993333,9.99,9.99,9.99,9.99,9.99,9.993333,206.0,257.5,260.0,292.0,318.0,473.0,421.0,400.0,370.0,383.5,325.0,307.0,326.0,295.0,365.0,314.0,281.5,236.0,214.0,194.5,181.5,175.5,189.0,209.5,190.5,227.5,226.5,269.0,303.0,348.0,342.5,331.0,301.0,323.0,349.0,285.5,4.08,4.535,8.17,4.995,6.97,6.99,6.965,6.925,8.335,6.61,8.035,6.82,6.665,6.985,6.765,6.35,6.8,10.425,8.1,7.985,7.575,9.21,8.145,7.67,7.28,6.345,6.02,6.97,5.85,5.94,5.6,6.09,6.055,5.88,5.575,6.075,179.882784,295.648351,339.413908,288.586082,172.717947,130.461544,126.622711,43.399266,57.509158,64.043956,75.692308,79.340658,80.87912,73.201465,73.245423,68.395606,103.296706,224.249082,224.307688,170.109886,239.956039,42.124543,40.029304,54.124542,55.311355,57.934066,59.633699,46.212454,35.560439,16.263737,121.582418,121.186812,19.897436,44.102564,56.87912,69.992673,0.126,0.178667,0.224333,0.211667,0.173167,0.109333,0.077,0.097,0.2005,0.165,0.227833,0.240833,0.161833,0.219333,0.185333,0.091,0.0555,0.031667,0.027667,0.033833,0.050667,0.077,0.129833,0.1705,0.161833,0.2215,0.201833,0.210833,0.1265,0.095167,0.072667,0.086167,0.082833,0.146833,0.185,0.237833
2022-01-02 02:00:00+00:00,4.085,5.985,5.97,7.18,8.515,7.38,7.38,7.275,7.415,9.03,6.255,9.89,9.03,6.645,9.005,6.905,8.195,11.805,12.5,12.915,10.82,11.435,11.805,8.66,11.435,8.66,5.645,5.98,6.165,6.675,6.48,6.39,6.255,6.35,6.335,7.4,158.46154,191.428575,221.186815,222.9011,225.01099,224.52747,222.373625,223.032965,219.86813,223.34066,222.549445,225.142855,220.395605,226.549455,221.934065,227.73627,233.14286,225.93407,229.758245,228.21978,231.38461,230.06593,227.25275,225.186815,230.63737,228.30769,226.549455,222.68132,225.406595,227.648355,224.923075,227.428575,221.714285,221.84615,227.6044,223.34066,3.855,4.075,4.305,5.05,5.61,5.775,5.54,5.495,5.24,5.14,5.085,5.22,5.01,5.17,5.185,5.035,5.155,4.995,4.72,4.45,4.35,4.33,4.335,4.16,4.165,4.245,4.59,4.85,5.105,5.05,4.99,4.915,4.855,4.885,4.61,4.845,143.0,153.5,171.0,214.5,269.5,273.0,245.5,244.5,219.0,211.5,212.5,205.0,206.0,216.0,192.5,167.5,164.0,145.5,129.0,123.0,123.0,130.0,136.5,133.0,136.5,145.5,170.5,184.5,211.5,209.5,207.0,202.0,208.5,215.0,193.0,210.5,10.118333,10.143333,10.143333,10.130833,10.076667,10.07,10.046667,10.06,10.046667,10.053333,10.07,10.07,10.07,10.07,10.07,10.07,10.07,10.043333,10.03,10.006667,10.003333,9.996667,9.98,9.966667,9.956667,9.976667,9.99,9.99,9.993333,9.99,9.99,9.99,9.99,9.99,9.993333,10.026667,257.5,260.0,292.0,318.0,473.0,421.0,400.0,370.0,383.5,325.0,307.0,326.0,295.0,365.0,314.0,281.5,236.0,214.0,194.5,181.5,175.5,189.0,209.5,190.5,227.5,226.5,269.0,303.0,348.0,342.5,331.0,301.0,323.0,349.0,285.5,328.0,4.535,8.17,4.995,6.97,6.99,6.965,6.925,8.335,6.61,8.035,6.82,6.665,6.985,6.765,6.35,6.8,10.425,8.1,7.985,7.575,9.21,8.145,7.67,7.28,6.345,6.02,6.97,5.85,5.94,5.6,6.09,6.055,5.88,5.575,6.075,6.51,295.648351,339.413908,288.586082,172.717947,130.461544,126.622711,43.399266,57.509158,64.043956,75.692308,79.340658,80.87912,73.201465,73.245423,68.395606,103.296706,224.249082,224.307688,170.109886,239.956039,42.124543,40.029304,54.124542,55.311355,57.934066,59.633699,46.212454,35.560439,16.263737,121.582418,121.186812,19.897436,44.102564,56.87912,69.992673,69.582418,0.178667,0.224333,0.211667,0.173167,0.109333,0.077,0.097,0.2005,0.165,0.227833,0.240833,0.161833,0.219333,0.185333,0.091,0.0555,0.031667,0.027667,0.033833,0.050667,0.077,0.129833,0.1705,0.161833,0.2215,0.201833,0.210833,0.1265,0.095167,0.072667,0.086167,0.082833,0.146833,0.185,0.237833,0.270167
2022-01-02 03:00:00+00:00,5.985,5.97,7.18,8.515,7.38,7.38,7.275,7.415,9.03,6.255,9.89,9.03,6.645,9.005,6.905,8.195,11.805,12.5,12.915,10.82,11.435,11.805,8.66,11.435,8.66,5.645,5.98,6.165,6.675,6.48,6.39,6.255,6.35,6.335,7.4,6.625,191.428575,221.186815,222.9011,225.01099,224.52747,222.373625,223.032965,219.86813,223.34066,222.549445,225.142855,220.395605,226.549455,221.934065,227.73627,233.14286,225.93407,229.758245,228.21978,231.38461,230.06593,227.25275,225.186815,230.63737,228.30769,226.549455,222.68132,225.406595,227.648355,224.923075,227.428575,221.714285,221.84615,227.6044,223.34066,224.131865,4.075,4.305,5.05,5.61,5.775,5.54,5.495,5.24,5.14,5.085,5.22,5.01,5.17,5.185,5.035,5.155,4.995,4.72,4.45,4.35,4.33,4.335,4.16,4.165,4.245,4.59,4.85,5.105,5.05,4.99,4.915,4.855,4.885,4.61,4.845,5.085,153.5,171.0,214.5,269.5,273.0,245.5,244.5,219.0,211.5,212.5,205.0,206.0,216.0,192.5,167.5,164.0,145.5,129.0,123.0,123.0,130.0,136.5,133.0,136.5,145.5,170.5,184.5,211.5,209.5,207.0,202.0,208.5,215.0,193.0,210.5,234.0,10.143333,10.143333,10.130833,10.076667,10.07,10.046667,10.06,10.046667,10.053333,10.07,10.07,10.07,10.07,10.07,10.07,10.07,10.043333,10.03,10.006667,10.003333,9.996667,9.98,9.966667,9.956667,9.976667,9.99,9.99,9.993333,9.99,9.99,9.99,9.99,9.99,9.993333,10.026667,10.03,260.0,292.0,318.0,473.0,421.0,400.0,370.0,383.5,325.0,307.0,326.0,295.0,365.0,314.0,281.5,236.0,214.0,194.5,181.5,175.5,189.0,209.5,190.5,227.5,226.5,269.0,303.0,348.0,342.5,331.0,301.0,323.0,349.0,285.5,328.0,371.0,8.17,4.995,6.97,6.99,6.965,6.925,8.335,6.61,8.035,6.82,6.665,6.985,6.765,6.35,6.8,10.425,8.1,7.985,7.575,9.21,8.145,7.67,7.28,6.345,6.02,6.97,5.85,5.94,5.6,6.09,6.055,5.88,5.575,6.075,6.51,8.24,339.413908,288.586082,172.717947,130.461544,126.622711,43.399266,57.509158,64.043956,75.692308,79.340658,80.87912,73.201465,73.245423,68.395606,103.296706,224.249082,224.307688,170.109886,239.956039,42.124543,40.029304,54.124542,55.311355,57.934066,59.633699,46.212454,35.560439,16.263737,121.582418,121.186812,19.897436,44.102564,56.87912,69.992673,69.582418,79.340662,0.224333,0.211667,0.173167,0.109333,0.077,0.097,0.2005,0.165,0.227833,0.240833,0.161833,0.219333,0.185333,0.091,0.0555,0.031667,0.027667,0.033833,0.050667,0.077,0.129833,0.1705,0.161833,0.2215,0.201833,0.210833,0.1265,0.095167,0.072667,0.086167,0.082833,0.146833,0.185,0.237833,0.270167,0.296
2022-01-02 04:00:00+00:00,5.97,7.18,8.515,7.38,7.38,7.275,7.415,9.03,6.255,9.89,9.03,6.645,9.005,6.905,8.195,11.805,12.5,12.915,10.82,11.435,11.805,8.66,11.435,8.66,5.645,5.98,6.165,6.675,6.48,6.39,6.255,6.35,6.335,7.4,6.625,6.165,221.186815,222.9011,225.01099,224.52747,222.373625,223.032965,219.86813,223.34066,222.549445,225.142855,220.395605,226.549455,221.934065,227.73627,233.14286,225.93407,229.758245,228.21978,231.38461,230.06593,227.25275,225.186815,230.63737,228.30769,226.549455,222.68132,225.406595,227.648355,224.923075,227.428575,221.714285,221.84615,227.6044,223.34066,224.131865,228.65934,4.305,5.05,5.61,5.775,5.54,5.495,5.24,5.14,5.085,5.22,5.01,5.17,5.185,5.035,5.155,4.995,4.72,4.45,4.35,4.33,4.335,4.16,4.165,4.245,4.59,4.85,5.105,5.05,4.99,4.915,4.855,4.885,4.61,4.845,5.085,5.045,171.0,214.5,269.5,273.0,245.5,244.5,219.0,211.5,212.5,205.0,206.0,216.0,192.5,167.5,164.0,145.5,129.0,123.0,123.0,130.0,136.5,133.0,136.5,145.5,170.5,184.5,211.5,209.5,207.0,202.0,208.5,215.0,193.0,210.5,234.0,224.0,10.143333,10.130833,10.076667,10.07,10.046667,10.06,10.046667,10.053333,10.07,10.07,10.07,10.07,10.07,10.07,10.07,10.043333,10.03,10.006667,10.003333,9.996667,9.98,9.966667,9.956667,9.976667,9.99,9.99,9.993333,9.99,9.99,9.99,9.99,9.99,9.993333,10.026667,10.03,10.036667,292.0,318.0,473.0,421.0,400.0,370.0,383.5,325.0,307.0,326.0,295.0,365.0,314.0,281.5,236.0,214.0,194.5,181.5,175.5,189.0,209.5,190.5,227.5,226.5,269.0,303.0,348.0,342.5,331.0,301.0,323.0,349.0,285.5,328.0,371.0,323.0,4.995,6.97,6.99,6.965,6.925,8.335,6.61,8.035,6.82,6.665,6.985,6.765,6.35,6.8,10.425,8.1,7.985,7.575,9.21,8.145,7.67,7.28,6.345,6.02,6.97,5.85,5.94,5.6,6.09,6.055,5.88,5.575,6.075,6.51,8.24,6.515,288.586082,172.717947,130.461544,126.622711,43.399266,57.509158,64.043956,75.692308,79.340658,80.87912,73.201465,73.245423,68.395606,103.296706,224.249082,224.307688,170.109886,239.956039,42.124543,40.029304,54.124542,55.311355,57.934066,59.633699,46.212454,35.560439,16.263737,121.582418,121.186812,19.897436,44.102564,56.87912,69.992673,69.582418,79.340662,84.600733,0.211667,0.173167,0.109333,0.077,0.097,0.2005,0.165,0.227833,0.240833,0.161833,0.219333,0.185333,0.091,0.0555,0.031667,0.027667,0.033833,0.050667,0.077,0.129833,0.1705,0.161833,0.2215,0.201833,0.210833,0.1265,0.095167,0.072667,0.086167,0.082833,0.146833,0.185,0.237833,0.270167,0.296,0.248167


In [6]:
buoy_df.shape

(4790, 324)

### 数据分割

In [7]:
# 预测变量
predictor_variables = buoy_df.columns.str.contains("\(t\-")
X = buoy_df.iloc[:, predictor_variables]
# print(X.head())
print(X.shape)

(4790, 216)


In [8]:
# 目标变量
target_var = "SWH"
target_variables = buoy_df.columns.str.contains(f"{target_var}\(t\+")
Y = buoy_df.iloc[:, target_variables]
# print(Y.head())
print(Y.shape)

(4790, 12)


In [9]:
# 训练集-测试集分割
X_train, X_test, Y_train, Y_test = train_test_split(
    X, 
    Y, 
    test_size = 0.3, 
    shuffle = False
)

### 模型构建

In [10]:
# 模型拟合
model_wo_fe = MultiOutputRegressor(LGBMRegressor())
model_wo_fe.fit(X_train, Y_train)

MultiOutputRegressor(estimator=LGBMRegressor())

In [11]:
# 模型预测
preds_wo_fe = model_wo_fe.predict(X_test)
print(preds_wo_fe.shape)

(1437, 12)


In [12]:
# 模型评估
mape(Y_test, preds_wo_fe)

0.23879314909803728

## Feature Engineering

### 单变量特征提取

In [13]:
SUMMARY_STATS = {
    "mean": np.mean,
    "sdev": np.std,
    "median": np.median,
}

univariate_features = {}
for col in buoy.columns:
    X_col = X.iloc[:, X.columns.str.startswith(col)]
    for feature, func in SUMMARY_STATS.items():
        univariate_features[f"{col}_{feature}"] = X_col.apply(func, axis = 1)
univariate_features_df = pd.concat(univariate_features, axis = 1)
univariate_features_df.head()

Unnamed: 0_level_0,PeakP_mean,PeakP_sdev,PeakP_median,PeakD_mean,PeakD_sdev,PeakD_median,Upcross_mean,Upcross_sdev,Upcross_median,SWH_mean,SWH_sdev,SWH_median,SeaTemp_mean,SeaTemp_sdev,SeaTemp_median,Hmax_mean,Hmax_sdev,Hmax_median,THmax_mean,THmax_sdev,THmax_median,MCurDir_mean,MCurDir_sdev,MCurDir_median,MCurSpd_mean,MCurSpd_sdev,MCurSpd_median
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
2022-01-02 00:00:00+00:00,8.073125,2.485878,7.3975,215.769231,22.155645,223.186813,4.858542,0.560603,5.0425,185.3125,45.580243,181.75,10.071875,0.041977,10.07,293.854167,79.016412,293.5,7.04625,1.435537,6.9675,140.465201,87.495178,114.959709,0.134667,0.067496,0.133417
2022-01-02 01:00:00+00:00,8.397917,2.442645,7.805,218.481685,19.215475,223.934065,4.877083,0.535039,5.0425,184.916667,45.959599,181.75,10.06559,0.043925,10.07,291.4375,80.655429,293.5,7.171042,1.35171,6.9775,135.301587,88.772141,92.087913,0.136063,0.067875,0.145833
2022-01-02 02:00:00+00:00,8.586667,2.274816,8.355,221.141026,15.1028,224.76923,4.886875,0.519448,5.0425,184.520833,46.362359,181.75,10.058576,0.045686,10.07,290.791667,81.395628,293.5,7.304375,1.188163,6.9875,130.11111,89.651223,80.109889,0.137556,0.068031,0.161833
2022-01-02 03:00:00+00:00,8.892917,2.138847,8.5875,224.148352,7.688401,225.076922,4.899792,0.496998,5.0425,184.25,46.622375,181.75,10.05184,0.048227,10.065,289.541667,82.124387,293.5,7.379792,1.060582,6.9875,120.206349,83.752773,77.516483,0.13934,0.06963,0.161833
2022-01-02 04:00:00+00:00,9.004375,2.052357,8.66,225.684982,3.586566,225.164835,4.906875,0.486289,5.0425,183.916667,46.868981,181.75,10.044896,0.046522,10.056667,288.145833,82.895702,293.5,7.290208,1.080664,6.9775,108.54884,70.917851,74.468866,0.138403,0.068623,0.161833


In [14]:
univariate_features_df.shape

(4790, 27)

### 二元特征提取

In [16]:
BIVARIATE_SUMMARY_STATS = {
    "covariance": covariance,
    "co_integration": co_integration,
    "js_div": jensenshannon,
}
BIVARIATE_TRANSFORMATIONS = {
    "corr": signal.correlate,
    "conv": signal.convolve,
    "rel_entr": rel_entr,
}

# pairs of variables
colnames = buoy.columns.tolist()
col_combs = list(itertools.combinations(colnames, 2))

bivariate_features = []
# ---------------------------------------
# for each row
# ---------------------------------------
for i, _ in X.iterrows():
    # ---------------------------------------
    # feature set in the i-th time-step
    # ---------------------------------------
    feature_set_i = {}
    for col1, col2 in col_combs:
        # ---------------------------------------
        # features for pair of columns col1, col2
        # ---------------------------------------
        # getting the i-th instance for each column
        x1 = X.loc[i, X.columns.str.startswith(col1)]
        x2 = X.loc[i, X.columns.str.startswith(col2)]
        # compute each summary stat
        for feat, func in BIVARIATE_SUMMARY_STATS.items():
            feature_set_i[f"{col1}|{col2}_{feat}"] = func(x1, x2)
        # for each transformation
        for trans_feat, trans_func in BIVARIATE_TRANSFORMATIONS.items():
            # apply transformation
            x_trans = trans_func(x1, x2)
            # compute summary stat
            for feat, summary_func in SUMMARY_STATS.items():
                feature_set_i[f"{col1}|{col2}_{trans_func}_{feat}"] = summary_func(x_trans)
    bivariate_features.append(feature_set_i)
bivariate_features_df = pd.DataFrame(bivariate_features, index = X.index)

In [17]:
X_with_features = pd.concat([
    X, 
    univariate_features_df, 
    bivariate_features_df
], axis = 1)

### 数据分割

In [18]:
X_train, X_test, Y_train, Y_test = train_test_split(
    X_with_features,
    Y,
    test_size = 0.3,
    shuffle = False,
)

### 模型构建

In [19]:
model_w_fe = MultiOutputRegressor(LGBMRegressor())
model_w_fe.fit(X_train, Y_train)

MultiOutputRegressor(estimator=LGBMRegressor())

In [20]:
preds_w_fe = model_w_fe.predict(X_test)
mape(Y_test, preds_w_fe)

0.23209346352885196

## Feature Selection

In [22]:
# importance of features
avg_imp = pd.DataFrame([
    x.feature_importances_ for x in model_w_fe.estimators_
]).mean()

# top 100 features
n_top_features = 100
importance_scores = pd.Series(dict(zip(X_train.columns, avg_imp)))
top_features = importance_scores.sort_values(ascending = False)[:n_top_features]
top_features_nm = top_features.index

# 特征选择
X_train_top = X_train[top_features_nm]
X_test_top = X_test[top_features_nm]

# 模型拟合
model_top_features = MultiOutputRegressor(LGBMRegressor())
model_top_features.fit(X_train_top, Y_train)

# 模型预测
preds_top_features = model_top_features.predict(X_test_top)

# 模型评估
mape(Y_test, preds_top_features)

0.23549452747597632