In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
import sys
import csv
from BDDData import *
import torch
import torch.utils.data as data
import importlib
import torch.nn.functional as F
from joblib import Parallel, delayed
from statsmodels.tsa.vector_ar.var_model import VAR

In [2]:
importlib.reload(sys.modules['BDDData'])

bdd_data = BDD_dataset("raw_data/")
bdd_data.add_timestep_id()
bdd_data.tag_chaotic(replace=True)

bdd_data.interpolate_power()
bdd_data.cap_power_to_zero()
bdd_data.normalize_power(min=0, max=1, method= "MinMaxScaler")
train, val, test = bdd_data.split_df()

In [121]:
class CustomBDD_Dataset(data.Dataset):
    def __init__(self, dataset, observation_window=12, forecast_window=12, starting_turbine = 0,  ending_turbine=133):
        self.observation_window = observation_window
        self.forecast_window = forecast_window
        length = eval(f'len({dataset}[0])')
        bdd_data.get_observation_forecasting_window(time_series_len=length, observation_steps=self.observation_window, forecast_steps=self.forecast_window)#Generates obs window
        self.window_of_interest =  bdd_data.sliding_indices[str(self.observation_window)+","+str(self.forecast_window)]#Retrieves windows
        self.starting_turbine = starting_turbine
        self.ending_turbine = ending_turbine  
        self.dataset = dataset

    def __len__(self):
        return len(self.window_of_interest)

    def __getitem__(self, idx):
        window = self.window_of_interest[idx]
        if self.dataset == "train":
            features = train[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]]#.transpose().reshape(-1, 1)
            labels = train[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]]#.transpose().reshape(-1, 1)
        elif self.dataset == "val":
            features = val[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]].transpose().reshape(-1, 1)
            labels = val[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]].transpose().reshape(-1, 1)
        elif self.dataset == "test":
            features = test[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]].transpose().reshape(-1, 1)
            labels = test[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]].transpose().reshape(-1, 1)
        else:
            raise NotImplementedError
        return torch.from_numpy(features).float(), torch.from_numpy(labels).float()
    
obs_window = 12
forecast_window = 12
batch_size = 100

train_dataset = CustomBDD_Dataset("train",observation_window=obs_window,forecast_window=forecast_window)
train_loader = data.DataLoader(train_dataset, shuffle=True, batch_size = batch_size)
# val_dataset = CustomBDD_Dataset("val",observation_window=obs_window,forecast_window=forecast_window)
# val_loader = data.DataLoader(val_dataset, shuffle=True, batch_size = batch_size)

In [129]:
def fit_arima_and_forecast(series, order):
    model = ARIMA(series, order=order)
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=12)
    
    return forecast

def fit_var_and_forecast(train_data, steps):
    model = VAR(train_data)
    model_fit = model.fit()
    forecast = model_fit.forecast(train_data[-model_fit.k_ar:], steps=steps)
    
    return forecast.T

def arima(order, limit=len(train_loader)):
    total_loss = 0
    # counter = 0
    n_jobs = -1
    
    for x, y in train_loader:
        # counter += 1
        # print(counter)
        batch_size = x.shape[0]
        
        # VAR
        # results = Parallel(n_jobs=n_jobs)(
        #     delayed(fit_var_and_forecast)(x[i].numpy().T, steps=12) for i in range(batch_size)
        # )
        # results = np.array(results)
        # results = torch.from_numpy(results).float()
        
        # var_results = []
        # for i in range(batch_size):
        #     train_data = x[i].numpy().T
        #     results = fit_var_and_forecast(train_data, steps=12)
        #     var_results.append(results.T)
        
        # var_results = np.array(var_results).reshape(batch_size, 134, 12)
        # results = torch.from_numpy(var_results).float()
        
        # ARIMA
        flat_x = x.view(-1, 12).tolist()
        results = Parallel(n_jobs=n_jobs)(delayed(fit_arima_and_forecast)(series, order) for series in flat_x)
    
        results = np.array(results).reshape(batch_size, 134, 12)
        results = torch.from_numpy(results).float()
        
        loss = F.mse_loss(results, y)
        total_loss += loss.item()
        
        # if counter == limit:
        #     break
    print(total_loss / limit)
    return total_loss / limit

In [None]:
# Run ARIMA on whole train set with order (0, 0, 1) (AR(0), I(0), MA(1)), which turns out to be the best order
arima((0, 0, 1))