In [2]:
import os
import random
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

# TensorFlow / Keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [10]:
df = pd.read_csv("D:\\FON Cetvrta godina\\Neuronske mreže\\Projekat\\Austria.csv")

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201604 entries, 0 to 201603
Data columns (total 8 columns):
 #   Column                                Non-Null Count   Dtype  
---  ------                                --------------   -----  
 0   Unnamed: 0                            201604 non-null  int64  
 1   utc_timestamp                         201604 non-null  object 
 2   cet_cest_timestamp                    201604 non-null  object 
 3   AT_price_day_ahead                    131378 non-null  float64
 4   AT_solar_generation_actual            201352 non-null  float64
 5   AT_wind_onshore_generation_actual     201406 non-null  float64
 6   AT_load_forecast_entsoe_transparency  201598 non-null  float64
 7   AT_load_actual_entsoe_transparency    201598 non-null  float64
dtypes: float64(5), int64(1), object(2)
memory usage: 12.3+ MB


In [14]:
df["utc_timestamp"] = pd.to_datetime(df["utc_timestamp"])

In [16]:
df = df.set_index("utc_timestamp").sort_index()

In [18]:
target = "AT_price_day_ahead"

In [22]:
df["cet_cest_timestamp"] = pd.to_datetime(df["cet_cest_timestamp"], utc=True)
df["cet_cest_timestamp"] = df["cet_cest_timestamp"].dt.tz_convert("CET")  # CET/CEST
df["hour"] = df["cet_cest_timestamp"].dt.hour
df["dayofweek"] = df["cet_cest_timestamp"].dt.dayofweek  # 0=Monday
df["month"] = df["cet_cest_timestamp"].dt.month

In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 201604 entries, 2014-12-31 23:00:00+00:00 to 2020-09-30 23:45:00+00:00
Data columns (total 10 columns):
 #   Column                                Non-Null Count   Dtype              
---  ------                                --------------   -----              
 0   Unnamed: 0                            201604 non-null  int64              
 1   cet_cest_timestamp                    201604 non-null  datetime64[ns, CET]
 2   AT_price_day_ahead                    131378 non-null  float64            
 3   AT_solar_generation_actual            201352 non-null  float64            
 4   AT_wind_onshore_generation_actual     201406 non-null  float64            
 5   AT_load_forecast_entsoe_transparency  201598 non-null  float64            
 6   AT_load_actual_entsoe_transparency    201598 non-null  float64            
 7   hour                                  201604 non-null  int32              
 8   dayofweek                             

In [26]:
split_index = int(0.7 * len(df))
train = df.iloc[:split_index].copy()
test = df.iloc[split_index:].copy()

In [28]:
train[target] = train[target].interpolate(method="time").ffill().bfill()

In [34]:
last_train_value = train[target].iloc[-1]
test[target] = test[target].ffill()
test[target] = test[target].fillna(last_train_value)

In [36]:
scaler_y = MinMaxScaler()
scaler_X = MinMaxScaler()

In [38]:
y_train_scaled = scaler_y.fit_transform(train[[target]])
y_test_scaled = scaler_y.transform(test[[target]])

In [40]:
features = ["hour", "dayofweek", "month",
            "AT_solar_generation_actual", 
            "AT_wind_onshore_generation_actual",
            "AT_load_forecast_entsoe_transparency",
            "AT_load_actual_entsoe_transparency"]

In [42]:
for col in features:
    train[col] = train[col].ffill().bfill()
    test[col] = test[col].ffill()

In [44]:
X_train_scaled = scaler_X.fit_transform(train[features])
X_test_scaled = scaler_X.transform(test[features])

In [46]:
train.isna().sum()

Unnamed: 0                              0
cet_cest_timestamp                      0
AT_price_day_ahead                      0
AT_solar_generation_actual              0
AT_wind_onshore_generation_actual       0
AT_load_forecast_entsoe_transparency    0
AT_load_actual_entsoe_transparency      0
hour                                    0
dayofweek                               0
month                                   0
dtype: int64

In [48]:
test.isna().sum()

Unnamed: 0                              0
cet_cest_timestamp                      0
AT_price_day_ahead                      0
AT_solar_generation_actual              0
AT_wind_onshore_generation_actual       0
AT_load_forecast_entsoe_transparency    0
AT_load_actual_entsoe_transparency      0
hour                                    0
dayofweek                               0
month                                   0
dtype: int64

In [50]:
def create_sequences(y, X, look_back=24):
    X_seq, y_seq = [], []
    for i in range(len(y) - look_back):
        seq_y = y[i:i+look_back]
        feat = X[i+look_back]
        X_seq.append(np.hstack([seq_y.flatten(), feat]))
        y_seq.append(y[i+look_back])
    return np.array(X_seq), np.array(y_seq)