In [1]:
import sys
import csv
sys.path.append('..')
from BDDData import *
import numpy as np
import torch
import torch.utils.data as data
import importlib

In [2]:
# Import the BDD_dataset class from BDDData module
from BDDData import BDD_dataset

# Reload the module to ensure we have the latest version
importlib.reload(sys.modules['BDDData'])

#Load dataframes
bdd_data = BDD_dataset("raw_data/")
#Add column with the timestep
bdd_data.add_timestep_id()
#Add flags for chaotic values
bdd_data.tag_chaotic(replace=True)

#Interpolate the missing values
bdd_data.interpolate_power()
#Values smaller than 0 are set to 0
bdd_data.cap_power_to_zero()
#Normalize Patv feature to [0,1]
bdd_data.normalize_power(min=0, max=1, method= "MinMaxScaler")
#Convert df to matrix form, where only Patv is included. Then split into train, validation and test
#The matrix contains the subset of the time series for ALL nodes, so an (TxN matrix)
train, val, test = bdd_data.split_df()

In [3]:
# Import the BDD_dataset class from BDDData module
from BDDData import BDD_dataset

# Reload the module to ensure we have the latest version
importlib.reload(sys.modules['BDDData'])

#Load dataframes
bdd_data = BDD_dataset("raw_data/")
#Add flags for chaotic values
bdd_data.tag_chaotic(replace=False)

#Values smaller than 0 are set to 0
bdd_data.cap_power_to_zero()
#Normalize Patv feature to [0,1]
bdd_data.normalize_power(min=0, max=1, method= "MinMaxScaler")
#Add column with the timestep
bdd_data.add_timestep_id()

#import the real values

#en split into traibdd_data.n, validation and test
#The matrix contains the subset of the time series for ALL nodes, so an (TxN matrix)
# train, val, test = bdd_data.split_df_no_missing_values(34346, 34375)
# print(f'Train set:\n{train}')
# print(f'val set:\n {val}')
# print(f'test set: \n{test}')

In [4]:
time_steps_array = bdd_data.find_complete_window()
print(time_steps_array)

[  275   276   277 ... 35276 35278 35279]


In [85]:
class CustomBDD_Dataset(data.Dataset):
    def __init__(self, dataset, data_array, observation_window=12, forecast_window=12, starting_turbine=0, ending_turbine=133):
        self.observation_window = int(observation_window)
        self.forecast_window = forecast_window
        self.window_of_interest = data_array
        self.dataset_values = data_array
        print(f"window of interest", self.window_of_interest)
        self.starting_turbine = int(starting_turbine)
        self.ending_turbine = int(ending_turbine)
        self.dataset = dataset

    def __len__(self):
        return len(self.window_of_interest)

    def __getitem__(self, idx):
        print(f"Index: {idx}")
        print(self.window_of_interest)
        window = self.window_of_interest[idx]
        print(f"Window: {window}")
        print(f"Starting Turbine: {self.starting_turbine}")
        print(f"Ending Turbine: {self.ending_turbine}")
        print(f"Dataset: {self.dataset}")

        if self.dataset == "train":
            features = self.dataset_values[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]].transpose()
            print("pliep")
            labels = self.dataset_values[self.starting_turbine:self.ending_turbine+1, window[1]:window[2]].transpose()
            print(f"Features shape: {features.shape}")
            print(f"Labels shape: {labels.shape}")
        elif self.dataset == "val":
            features = val[self.starting_turbine:self.ending_turbine+1, window[0]:window[1]].transpose()
            labels = val[self.starting_turbine:self.ending_turbine+1, window[1]:window[2]].transpose()
            print(f"Features shape: {features.shape}")
            print(f"Labels shape: {labels.shape}")
        elif self.dataset == "test":
            features = test[self.starting_turbine:self.ending_turbine+1, window[0]:window[1]].transpose()
            labels = test[self.starting_turbine:self.ending_turbine+1, window[1]:window[2]].transpose()
            print(f"Features shape: {features.shape}")
            print(f"Labels shape: {labels.shape}")
        else:
            raise NotImplementedError

        return torch.from_numpy(features).float(), torch.from_numpy(labels).float()

    
obs_window = 12
forecast_window = 12
val_window = 3

# train_dataset = CustomBDD_Dataset("train",observation_window=obs_window,forecast_window=forecast_window)
# train_loader = data.DataLoader(train_dataset, shuffle=True, batch_size = 100)
# val_dataset = CustomBDD_Dataset("val",observation_window=obs_window,forecast_window=forecast_window)
# val_loader = data.DataLoader(val_dataset, shuffle=True, batch_size = 100)

In [6]:
obs_window = 12
forecast_window = 12
val_window = 3
def find_chains(array,window_size):
    chains = []
    current_chain = [array[0]]
    for i in range(1, len(array)):
        if array[i] - array[i-1] == 1:
            current_chain.append(array[i])
        else:
            if len(current_chain) > window_size:
                chains.append(current_chain)
            current_chain = [array[i]]
    if len(current_chain) > window_size:
        chains.append(current_chain)
    return chains

# Find chains with lengths greater than 24
complete_windows = find_chains(time_steps_array,obs_window + forecast_window + val_window)
print(complete_windows)

[[30772, 30773, 30774, 30775, 30776, 30777, 30778, 30779, 30780, 30781, 30782, 30783, 30784, 30785, 30786, 30787, 30788, 30789, 30790, 30791, 30792, 30793, 30794, 30795, 30796, 30797, 30798, 30799], [31113, 31114, 31115, 31116, 31117, 31118, 31119, 31120, 31121, 31122, 31123, 31124, 31125, 31126, 31127, 31128, 31129, 31130, 31131, 31132, 31133, 31134, 31135, 31136, 31137, 31138, 31139, 31140], [34345, 34346, 34347, 34348, 34349, 34350, 34351, 34352, 34353, 34354, 34355, 34356, 34357, 34358, 34359, 34360, 34361, 34362, 34363, 34364, 34365, 34366, 34367, 34368, 34369, 34370, 34371, 34372, 34373, 34374]]


In [12]:
train, val, test = bdd_data.split_df_custom(complete_windows, chain_index=1, obs_window = 12, forecast_window = 12, val_window = 3)

print(train)
print(f"Train shape: {train.shape}")

train_loader = data.DataLoader(train, shuffle=True, batch_size = 10)
val_loader = data.DataLoader(val, shuffle=True, batch_size = 10)

print(train_loader)

[[0.38890378 0.47100228 0.48918967 ... 0.76162397 0.74722084 0.69575372]
 [0.4252275  0.4546464  0.58075838 ... 0.80498653 0.75194318 0.84254189]
 [0.5239563  0.42562954 0.3499381  ... 0.90115633 0.87420709 0.42026905]
 ...
 [0.20440071 0.2219308  0.25437455 ... 0.3094217  0.34692601 0.31369095]
 [0.1887085  0.18923817 0.25257495 ... 0.32695818 0.3418846  0.32743041]
 [0.30767954 0.30679889 0.35344795 ... 0.33667088 0.31983638 0.29808171]]
[[0.38890378 0.47100228 0.48918967 ... 0.76162397 0.74722084 0.69575372]
 [0.4252275  0.4546464  0.58075838 ... 0.80498653 0.75194318 0.84254189]
 [0.5239563  0.42562954 0.3499381  ... 0.90115633 0.87420709 0.42026905]
 ...
 [0.20440071 0.2219308  0.25437455 ... 0.3094217  0.34692601 0.31369095]
 [0.1887085  0.18923817 0.25257495 ... 0.32695818 0.3418846  0.32743041]
 [0.30767954 0.30679889 0.35344795 ... 0.33667088 0.31983638 0.29808171]]
Train shape: (134, 12)
<torch.utils.data.dataloader.DataLoader object at 0x13d6105d0>


In [13]:


# Iterate through a few batches from the train loader
for i, batch in enumerate(train_loader):
    print(f"Batch {i+1}:")
    print(batch)
    print(f"Batch shape: {batch.shape}")
    
    # Limit to a few batches
    if i == 2:  # For example, stop after 3 batches
        break


Batch 1:
tensor([[0.2097, 0.2188, 0.2551, 0.2123, 0.1716, 0.2777, 0.2702, 0.1920, 0.2172,
         0.3043, 0.3098, 0.3266],
        [0.3729, 0.3645, 0.4804, 0.3739, 0.3280, 0.6268, 0.5986, 0.5789, 0.5256,
         0.2263, 0.3033, 0.3491],
        [0.2300, 0.2253, 0.2698, 0.2475, 0.2131, 0.2250, 0.2072, 0.1865, 0.2189,
         0.2522, 0.2132, 0.2584],
        [0.3009, 0.3395, 0.3631, 0.3141, 0.2986, 0.2916, 0.2116, 0.2324, 0.2996,
         0.3341, 0.3352, 0.3447],
        [0.1887, 0.1892, 0.2526, 0.2777, 0.2801, 0.2611, 0.2682, 0.2912, 0.3061,
         0.3270, 0.3419, 0.3274],
        [0.3208, 0.4518, 0.4545, 0.3942, 0.5770, 0.5558, 0.7255, 0.7638, 0.7693,
         0.8171, 0.8205, 0.9060],
        [0.2542, 0.2912, 0.3413, 0.3191, 0.2880, 0.2563, 0.2525, 0.2220, 0.2607,
         0.2467, 0.1992, 0.1912],
        [0.2804, 0.2770, 0.3022, 0.3299, 0.3077, 0.3035, 0.2915, 0.2662, 0.2517,
         0.2796, 0.2698, 0.2617],
        [0.3467, 0.3815, 0.4555, 0.4634, 0.4184, 0.4560, 0.4521, 0.5532