In [1]:
import os

import torch
from torch.utils.data import Dataset, DataLoader
from darts import TimeSeries
from darts.dataprocessing import Pipeline
from darts.dataprocessing.transformers.boxcox import BoxCox
from darts.dataprocessing.transformers.scaler import Scaler
from sklearn.preprocessing import MinMaxScaler, RobustScaler
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


os.chdir(r"..") # should be the git repo root directory
cwd = os.getcwd()
print("Current working directory: " + cwd)
assert os.getcwd()[-8:] == "WattCast", "Current working directory is not the git repo root directory"

DIR_DATA = os.path.join(cwd, 'data')

class TimeSeriesDataset(Dataset):
    def __init__(self, data, history_length=5, forecast_length=2):
        self.data = data
        self.history_length = history_length
        self.forecast_length = forecast_length

    def __getitem__(self, index):
        # Get the historical data and target values for the current index
        history = self.data[index:index+self.history_length].values()
        target = self.data[index+self.history_length:index+self.history_length+self.forecast_length].values()

        return history, target

    def __len__(self):
        return len(self.data) - self.history_length - self.forecast_length + 1



Current working directory: e:\GitHub\Forked_Repos\WattCast


In [2]:
import pandas as pd
import os
import darts
from darts.utils.missing_values import extract_subseries

dir_path = 'E:/GitHub/Forked_Repos/WattCast/data/clean_data'
spatial_scale = '1_county'
location = 'Los_Angeles'
temp_resolution = '60'
boxcox = True




df_train = pd.read_hdf(os.path.join(dir_path, f'{spatial_scale}.h5'), key=f'{location}/{temp_resolution}min/train_target')
df_val = pd.read_hdf(os.path.join(dir_path, f'{spatial_scale}.h5'), key=f'{location}/{temp_resolution}min/val_target')
df_test = pd.read_hdf(os.path.join(dir_path, f'{spatial_scale}.h5'),key=f'{location}/{temp_resolution}min/test_target')

df_cov_train = pd.read_hdf(os.path.join(dir_path, f'{spatial_scale}.h5'), key=f'{location}/{temp_resolution}min/train_cov')
df_cov_val = pd.read_hdf(os.path.join(dir_path, f'{spatial_scale}.h5'), key=f'{location}/{temp_resolution}min/val_cov')
df_cov_test = pd.read_hdf(os.path.join(dir_path,f'{spatial_scale}.h5'), key=f'{location}/{temp_resolution}min/test_cov')


In [3]:
df_cov_train['heat_wave'] =  df_cov_train[df_cov_train.columns[0]] > df_cov_train[df_cov_train.columns[0]].quantile(0.95)
df_cov_val['heat_wave'] =  df_cov_val[df_cov_val.columns[0]] > df_cov_val[df_cov_val.columns[0]].quantile(0.95)
df_cov_test['heat_wave'] =  df_cov_test[df_cov_test.columns[0]] > df_cov_test[df_cov_test.columns[0]].quantile(0.95)


datetime_encoders =  {
                    "cyclic": {"future": timestep_encoding}, 
                    "position": {"future": ["relative",]},
                    "datetime_attribute": {"future": ["dayofweek", "week"]},
                    'position': {'past': ['relative'], 'future': ['relative']},
            }


ts_train = darts.TimeSeries.from_dataframe(df_train, freq=str(temp_resolution) + 'min')
ts_train = extract_subseries(ts_train)
ts_val = darts.TimeSeries.from_dataframe(df_val, freq=str(temp_resolution) + 'min')
ts_val = extract_subseries(ts_val)
ts_test = darts.TimeSeries.from_dataframe(df_test, freq=str(temp_resolution) + 'min')
ts_test = extract_subseries(ts_test)


# Preprocessing Pipeline
pipeline = Pipeline( # missing values have been filled in the 'data_prep.ipynb'
                [
                BoxCox() if boxcox else Scaler(MinMaxScaler()), # double scale in case boxcox is turned off
                Scaler(MinMaxScaler()),
                ]
                )
ts_train_piped = pipeline.fit_transform(ts_train)
ts_val_piped = pipeline.transform(ts_val)
ts_test_piped = pipeline.transform(ts_test)


In [4]:
ts_train[0]
ts_train_piped[0]