In [6]:
import sys
import csv
sys.path.append('..')
from BDDData import *
import numpy as np
import torch
import torch.utils.data as data
import importlib

In [8]:
# Import the BDD_dataset class from BDDData module
from BDDData import BDD_dataset

# Reload the module to ensure we have the latest version
importlib.reload(sys.modules['BDDData'])

#Load dataframes
bdd_data = BDD_dataset("raw_data/")
#Add column with the timestep
bdd_data.add_timestep_id()
#Add flags for chaotic values
bdd_data.tag_chaotic(replace=True)

#Interpolate the missing values
bdd_data.interpolate_power()
#Values smaller than 0 are set to 0
bdd_data.cap_power_to_zero()
#Normalize Patv feature to [0,1]
bdd_data.normalize_power(min=0, max=1, method= "MinMaxScaler")
#Convert df to matrix form, where only Patv is included. Then split into train, validation and test
#The matrix contains the subset of the time series for ALL nodes, so an (TxN matrix)
train, val, test = bdd_data.split_df()

In [16]:
# Import the BDD_dataset class from BDDData module
from BDDData import BDD_dataset

# Reload the module to ensure we have the latest version
importlib.reload(sys.modules['BDDData'])

#Load dataframes
bdd_data = BDD_dataset("raw_data/")
#Add column with the timestep
bdd_data.add_timestep_id()
#Add flags for chaotic values
bdd_data.tag_chaotic(replace=True)

#Values smaller than 0 are set to 0
bdd_data.cap_power_to_zero()
#Normalize Patv feature to [0,1]
bdd_data.normalize_power(min=0, max=1, method= "MinMaxScaler")

#import the real values


#en split into traibdd_data.n, validation and test
#The matrix contains the subset of the time series for ALL nodes, so an (TxN matrix)
train, val, test = bdd_data.split_df_no_missing_values(34346, 34375)
print(f'Train set:\n{train}')
print(f'val set:\n {val}')
print(f'test set: \n{test}')

start to train_end 20
train_end to val_end 2
val_end to end 7
Matrix shape: (134, 34528)
Train set:
[[0.44391903 0.43567408 0.55502163 ... 0.11154931 0.1148039  0.08510421]
 [0.47078531 0.43481896 0.47455042 ... 0.12580567 0.1155633  0.114906  ]
 [0.42435961 0.51452438 0.6461564  ... 0.12581843 0.09498283 0.08590828]
 ...
 [0.43311508 0.38046738 0.51222065 ... 0.19530063 0.17979987 0.16298452]
 [0.41768452 0.37882733 0.44973261 ... 0.23380684 0.17262064 0.1660668 ]
 [0.41160292 0.39431532 0.4498411  ... 0.18841495 0.19860627 0.21129277]]
val set:
 [[0.09604855]
 [0.0930237 ]
 [0.07308777]
 [0.08557644]
 [0.07775906]
 [0.07622749]
 [0.07646361]
 [0.08084134]
 [0.0860742 ]
 [0.07906727]
 [0.1021493 ]
 [0.10602928]
 [0.09859479]
 [0.11613764]
 [0.11965387]
 [0.13788592]
 [0.14599686]
 [0.14359102]
 [0.1620911 ]
 [0.17320774]
 [0.13260201]
 [0.08912458]
 [0.06561499]
 [0.09798854]
 [0.07144133]
 [0.07614453]
 [0.06213705]
 [0.0765338 ]
 [0.06967365]
 [0.0576253 ]
 [0.08393001]
 [0.09883728

In [7]:
class CustomBDD_Dataset(data.Dataset):
    def __init__(self, dataset, observation_window=12, forecast_window=12, starting_turbine = 0,  ending_turbine=133):
        self.observation_window = observation_window
        self.forecast_window = forecast_window
        length = eval(f'len({dataset}[0])')#Retrieves length of dataset
        bdd_data.get_observation_forecasting_window(time_series_len=length, observation_steps=self.observation_window, forecast_steps=self.forecast_window)#Generates obs window
        self.window_of_interest =  bdd_data.sliding_indices[str(self.observation_window)+","+str(self.forecast_window)]#Retrieves windows
        self.starting_turbine = starting_turbine
        self.ending_turbine = ending_turbine  
        self.dataset = dataset

    def __len__(self):
        return len(self.window_of_interest)

    def __getitem__(self, idx):
        window = self.window_of_interest[idx]
        if self.dataset == "train":
            features = train[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]].transpose()
            labels = train[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]].transpose()
        elif self.dataset == "val":
            features = val[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]].transpose()
            labels = val[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]].transpose()
        elif self.dataset == "test":
            features = test[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]].transpose()
            labels = test[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]].transpose()
        else:
            raise NotImplementedError
        return torch.from_numpy(features).float(), torch.from_numpy(labels).float()
    
obs_window = 12
forecast_window = 12

train_dataset = CustomBDD_Dataset("train",observation_window=obs_window,forecast_window=forecast_window)
train_loader = data.DataLoader(train_dataset, shuffle=True, batch_size = 100)
val_dataset = CustomBDD_Dataset("val",observation_window=obs_window,forecast_window=forecast_window)
val_loader = data.DataLoader(val_dataset, shuffle=True, batch_size = 100)

In [9]:
x,y = next(iter(train_loader))
print(f"{x.shape=}\n{y.shape=}")

x.shape=torch.Size([100, 12, 134])
y.shape=torch.Size([100, 12, 134])
