In [None]:
DATA_PATH = 'data/no_exogenous/data.csv'
FREQ = 'h'
YEAR = 2021
LOOK_BACK = 36
OUTPUT_PATH = f'data/predictions_lstm_{YEAR}.csv'

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
data = pd.read_csv(DATA_PATH, parse_dates=['timestamp'])
data = data.set_index('timestamp').asfreq(FREQ).dropna()
data['year'] = data.index.year

train = data[(data['year'] < YEAR) & (data['year'] > YEAR - 5)]
test = data[data['year'] == YEAR]

X = train[['hour','day_of_week','month','year','is_weekend','quarter','is_holiday']]
y = train['value']

sf = MinMaxScaler().fit(X)
st = MinMaxScaler().fit(y.values.reshape(-1,1))

X_s = sf.transform(X)
y_s = st.transform(y.values.reshape(-1,1)).ravel()

def create_seq(arr, target, lb):
    Xs, ys = [], []
    for i in range(lb, len(arr)):
        Xs.append(arr[i-lb:i])
        ys.append(target[i])
    return np.array(Xs), np.array(ys)

X_seq, y_seq = create_seq(X_s, y_s, LOOK_BACK)


In [None]:
n_feat = X.shape[1]
model = Sequential()
model.add(LSTM(200, return_sequences=True, input_shape=(LOOK_BACK, n_feat)))
model.add(LSTM(100, return_sequences=True))
model.add(LSTM(50))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

model.fit(X_seq, y_seq, epochs=200, batch_size=128, callbacks=[EarlyStopping(patience=15)], verbose=1)


In [None]:
X_test = test[['hour','day_of_week','month','year','is_weekend','quarter','is_holiday']]
X_ts = sf.transform(X_test)
X_tseq, _ = create_seq(X_ts, st.transform(test['value'].values.reshape(-1,1)).ravel(), LOOK_BACK)

y_pred_s = model.predict(X_tseq)
y_pred = st.inverse_transform(y_pred_s).flatten()

In [None]:
out = test.iloc[LOOK_BACK:].copy()
out['value'] = y_pred
out.to_csv(OUTPUT_PATH, index=False)