In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
import sys
import csv
from BDDData import *
import torch
import torch.utils.data as data
import importlib
import torch.nn.functional as F
from joblib import Parallel, delayed
from statsmodels.tsa.vector_ar.var_model import VAR

In [2]:
importlib.reload(sys.modules['BDDData'])

bdd_data = BDD_dataset("raw_data/")
bdd_data.add_timestep_id()
bdd_data.tag_chaotic(replace=True)

bdd_data.interpolate_power()
bdd_data.cap_power_to_zero()
bdd_data.normalize_power(min=0, max=1, method= "MinMaxScaler")
train, val, test = bdd_data.split_df()

In [34]:
class CustomBDD_Dataset(data.Dataset):
    def __init__(self, dataset, observation_window=12, forecast_window=12, starting_turbine = 0,  ending_turbine=133):
        self.observation_window = observation_window
        self.forecast_window = forecast_window
        length = eval(f'len({dataset}[0])')
        bdd_data.get_observation_forecasting_window(time_series_len=length, observation_steps=self.observation_window, forecast_steps=self.forecast_window)#Generates obs window
        self.window_of_interest =  bdd_data.sliding_indices[str(self.observation_window)+","+str(self.forecast_window)]#Retrieves windows
        self.starting_turbine = starting_turbine
        self.ending_turbine = ending_turbine  
        self.dataset = dataset

    def __len__(self):
        return len(self.window_of_interest)

    def __getitem__(self, idx):
        window = self.window_of_interest[idx]
        if self.dataset == "train":
            features = train[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]]#.transpose()#.reshape(-1, 1)
            labels = train[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]]#.transpose()#.reshape(-1, 1)
        elif self.dataset == "val":
            features = val[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]].transpose().reshape(-1, 1)
            labels = val[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]].transpose().reshape(-1, 1)
        elif self.dataset == "test":
            features = test[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]].transpose().reshape(-1, 1)
            labels = test[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]].transpose().reshape(-1, 1)
        else:
            raise NotImplementedError
        return torch.from_numpy(features).float(), torch.from_numpy(labels).float()
    
obs_window = 12
forecast_window = 12
batch_size = 1

train_dataset = CustomBDD_Dataset("train",observation_window=obs_window,forecast_window=forecast_window)
train_loader = data.DataLoader(train_dataset, shuffle=True, batch_size = batch_size)
# val_dataset = CustomBDD_Dataset("val",observation_window=obs_window,forecast_window=forecast_window)
# val_loader = data.DataLoader(val_dataset, shuffle=True, batch_size = batch_size)

In [93]:
def fit_arima_and_forecast(series, order):
    model = ARIMA(series, order=order)
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=12)
    
    return forecast

def var_forecast(train_data, steps):
    model = VAR(train_data)
    model_fit = model.fit()
    forecast = model_fit.forecast(train_data[-model_fit.k_ar:], steps=steps)
    return forecast

def arima(order, limit):
    total_loss = 0
    counter = 0
    n_jobs = -1

    for x, y in train_loader:
        counter += 1
        # print(f"{counter}/{len(train_loader)}")
        
        # VAR
        # train_data = x.numpy()[0].T
        # results = var_forecast(train_data, steps=12)
        # foresultsrecast = torch.from_numpy(results.T).float()
        # results = results.unsqueeze(0)
        
        # ARIMA
        results = Parallel(n_jobs=n_jobs)(delayed(fit_arima_and_forecast)(j.tolist(), order) for j in x[0])
        results = np.array(results).reshape(1, 134, 12)
        results = torch.from_numpy(results).float()

        loss = F.mse_loss(results, y)
        total_loss += loss.item()

        if counter == limit:
            break
    print(total_loss / limit)

In [94]:
arima((1, 0, 3), 10)

0.013861831766553223


In [None]:
for p in range(4, 4):
    for d in range(2):
        for q in range(4):
            try:
                print(p, d, q)
                arima((p, d, q), 10)
            except Exception as e:
                continue