In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
import sys
import csv
from BDDData import *
import torch
import torch.utils.data as data
import importlib
import torch.nn.functional as F
import torch.nn as nn
from joblib import Parallel, delayed
from statsmodels.tsa.vector_ar.var_model import VAR

In [None]:
importlib.reload(sys.modules['BDDData'])

bdd_data = BDD_dataset("raw_data/")
bdd_data.add_timestep_id()
bdd_data.tag_chaotic(replace=True)

bdd_data.interpolate_power()
bdd_data.cap_power_to_zero()
bdd_data.normalize_power(min=0, max=1, method= "MinMaxScaler")
train, val, test = bdd_data.split_df()

In [None]:
# Only take a fraction of the splits
np.random.seed(42)
p = 0.1
train_mask = np.random.choice(train.shape[1], int(train.shape[1] * p), replace=False)
val_mask = np.random.choice(val.shape[1], int(val.shape[1] * p), replace=False)
test_mask = np.random.choice(test.shape[1], int(test.shape[1] * p), replace=False)

train = train[:, train_mask]
val = val[:, val_mask]
test = test[:, test_mask]

In [None]:
class CustomBDD_Dataset(data.Dataset):
    def __init__(self, dataset, observation_window=12, forecast_window=12, starting_turbine = 0,  ending_turbine=133):
        self.observation_window = observation_window
        self.forecast_window = forecast_window
        length = eval(f'len({dataset}[0])')
        bdd_data.get_observation_forecasting_window(time_series_len=length, observation_steps=self.observation_window, forecast_steps=self.forecast_window)
        self.window_of_interest =  bdd_data.sliding_indices[str(self.observation_window)+","+str(self.forecast_window)]
        self.starting_turbine = starting_turbine
        self.ending_turbine = ending_turbine  
        self.dataset = dataset

    def __len__(self):
        return len(self.window_of_interest)

    def __getitem__(self, idx):
        window = self.window_of_interest[idx]
        if self.dataset == "train":
            features = train[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]]#.transpose().reshape(-1, 1)
            labels = train[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]]#.transpose().reshape(-1, 1)
        elif self.dataset == "val":
            features = val[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]]#.transpose().reshape(-1, 1)
            labels = val[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]]#.transpose().reshape(-1, 1)
        elif self.dataset == "test":
            features = test[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]]#.transpose().reshape(-1, 1)
            labels = test[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]]#.transpose().reshape(-1, 1)
        else:
            raise NotImplementedError
        return torch.from_numpy(features).float(), torch.from_numpy(labels).float()
    
obs_window = 12
forecast_window = 12
batch_size = 1

train_dataset = CustomBDD_Dataset("train",observation_window=obs_window,forecast_window=forecast_window)
train_loader = data.DataLoader(train_dataset, shuffle=True, batch_size = batch_size)
val_dataset = CustomBDD_Dataset("val",observation_window=obs_window,forecast_window=forecast_window)
val_loader = data.DataLoader(val_dataset, shuffle=True, batch_size = batch_size)

In [None]:
# Helper functions
class SDELoss(nn.Module):
    def __init__(self):
        super(SDELoss, self).__init__()

    def forward(self, input, target):
        error = input - target
        mean_error = torch.mean(error)
        return torch.sqrt(torch.mean((error - mean_error) ** 2))
    
class MaskedLoss(nn.Module):
    def __init__(self, criterion):
        super(MaskedLoss, self).__init__()
        self.criterion = criterion

    def forward(self, prediction, target):
        # Create a mask that is 1 for non-NaN entries and 0 for NaN entries
        mask = ~torch.isnan(target)
        # Apply the mask to only keep non-NaN elements
        out = prediction[mask]
        tar = target[mask]
        # Calculate MSE Loss on non-NaN elements
        return self.criterion(out, tar)

In [None]:
# Functions to run ARIMA
def fit_arima_and_forecast(series, order):
    model = ARIMA(series, order=order)
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=12)
    return forecast

def arima(order, limit=len(train_loader)):
    total_rmse_loss = 0
    total_mae_loss = 0
    total_sde_loss = 0
    counter = 0
    n_jobs = -1
    
    mse_loss = MaskedLoss(nn.MSELoss())
    mae_loss = MaskedLoss(nn.L1Loss())
    sde_loss = MaskedLoss(SDELoss())
    
    for x, y in val_loader:
        counter += 1
        print(counter)
        batch_size = x.shape[0]
        
        flat_x = x.view(-1, 12).tolist()
        results = Parallel(n_jobs=n_jobs)(delayed(fit_arima_and_forecast)(series, order) for series in flat_x)
    
        results = np.array(results).reshape(batch_size, 134, 12)
        results = torch.from_numpy(results).float()
        
        mse = mse_loss.forward(results, y)
        mae = mae_loss.forward(results, y)
        sde = sde_loss.forward(results, y)
        
        rmse = torch.sqrt(mse)
        
        total_rmse_loss += rmse.item()
        total_mae_loss += mae.item()
        total_sde_loss += sde.item()
        
        if counter == limit:
            break
    print(f'RMSE: {total_rmse_loss / limit}, MAE: {total_mae_loss / limit}, SDE: {total_sde_loss / limit}')
    return total_rmse_loss / limit, total_mae_loss / limit, total_sde_loss / limit

In [None]:
# Run ARIMA on the train set with order (1, 0, 0) (AR(1), I(0), MA(0))
arima((0, 0, 0), 10)