In [2]:
import os
import datetime

import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from workalendar.usa import Texas


mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False
DAYS_IN_YEAR = 365
HOURS_IN_DAY = 24
DAYS_OF_WEEK = ['week_1','week_2','week_3','week_4','week_5','week_6','week_7']
MINUTES_IN_HOUR = 60
SECONDS_IN_MINUTE = 60
MINUTES_IN_DAY = MINUTES_IN_HOUR * HOURS_IN_DAY

ModuleNotFoundError: No module named 'tensorflow'

In [30]:
csv_path = '../data/ERCOT/ercot_data.csv'
df = pd.read_csv(csv_path, index_col=0)

In [31]:
def get_fractional_hour_from_series(series: pd.Series) -> pd.Series:
    """
    Return fractional hour in range 0-24, e.g. 12h30m --> 12.5.
    Accurate to 1 minute.
    """
    hour = series.hour
    minute = series.minute
    return hour + minute / MINUTES_IN_HOUR

def get_fractional_day_from_series(series: pd.Series) -> pd.Series:
    """
    Return fractional day in range 0-1, e.g. 12h30m --> 0.521.
    Accurate to 1 minute
    """
    fractional_hours = get_fractional_hour_from_series(series)
    return fractional_hours / HOURS_IN_DAY

def get_fractional_year_from_series(series: pd.Series) -> pd.Series:
    """
    Return fractional year in range 0-1.
    Accurate to 1 day
    """
    return (series.dayofyear - 1) / DAYS_IN_YEAR

def normalize(self, tensor):
    self.scaler = MinMaxScaler(feature_range=(0, 1))
    tensor = self.scaler.fit_transform(tensor)
    return tensor

def preprocess(dataframe):
    dataframe.index = pd.to_datetime(dataframe.index)
    # Removing duplicates
    dataframe = dataframe[~dataframe.index.duplicated()]

    #Filling NaN values
    dataframe = dataframe.interpolate()

    # Setting the calendar holidats
    cal = Texas()
    years = list(range(2014, 2025))
    holidays = []
    for year in years:
        holidays.extend(cal.holidays(year))

    dataframe = dataframe.sort_index()

    #print('preprocess:')
    #print(dataframe.columns)
    # Rename the target column to 'Valeur' for convenience
    dataframe.rename(columns={dataframe.columns[0]: 'value'}, inplace=True)

    # Logarithmic transform add 1 for non-negative data (zeros in the series)
    #dataframe[self.Y_var] = log(dataframe[self.Y_var] + 1)

    #working day {0,1}
    dataframe['working day'] = dataframe.index.map(cal.is_working_day).astype(np.float32)

    #fractional hour [0,1]
    dataframe['fractional hour'] = get_fractional_day_from_series(dataframe.index)

    # day of week one-hot encoded
    dataframe['day of week'] = dataframe.index.dayofweek + 1
    dataframe['day of week'] = pd.Categorical(dataframe['day of week'], categories=[1,2,3,4,5,6,7], ordered=True)
    dataframe = pd.get_dummies(dataframe,prefix=['week'], columns = ['day of week'], drop_first=False)
    #dataframe = pd.concat([dataframe, pd.DataFrame(columns=DAYS_OF_WEEK)]).fillna(0)


    # fractional day of year
    dataframe['day of year'] = get_fractional_year_from_series(dataframe.index)

    # we encode cynical data into two dimensions using a sine and cosine transformations
    def encode(data, col, max_val):
        data[col + '_sin'] = np.sin(2 * np.pi * data[col]/max_val)
        data[col + '_cos'] = np.cos(2 * np.pi * data[col]/max_val)
        return data
    dataframe = encode(dataframe, 'fractional hour', HOURS_IN_DAY)
    dataframe = encode(dataframe, 'day of year', DAYS_IN_YEAR)
    # dropping original columns
    dataframe = dataframe.drop(['fractional hour','day of year'], axis=1)
    return dataframe

In [53]:
df = preprocess(df)

In [54]:
df

Unnamed: 0,value,Temperatures (°C),working day,week_1,week_2,week_3,week_4,week_5,week_6,week_7,...,fractional hour_cos,day of year_sin,day of year_cos,week_1.1,week_2.1,week_3.1,week_4.1,week_5.1,week_6.1,week_7.1
2012-01-01 01:00:00,849.000892,16.906305,0.0,0,0,0,0,0,0,1,...,0.999941,0.000000,1.000000,0,0,0,0,0,0,1
2012-01-01 02:00:00,845.097363,15.528070,0.0,0,0,0,0,0,0,1,...,0.999762,0.000000,1.000000,0,0,0,0,0,0,1
2012-01-01 03:00:00,840.902848,14.686365,0.0,0,0,0,0,0,0,1,...,0.999465,0.000000,1.000000,0,0,0,0,0,0,1
2012-01-01 04:00:00,845.452257,14.500238,0.0,0,0,0,0,0,0,1,...,0.999048,0.000000,1.000000,0,0,0,0,0,0,1
2012-01-01 05:00:00,862.369386,12.071802,0.0,0,0,0,0,0,0,1,...,0.998513,0.000000,1.000000,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015-12-31 20:00:00,1252.891804,3.971521,1.0,0,0,0,1,0,0,0,...,0.976296,0.017166,0.999853,0,0,0,1,0,0,0
2015-12-31 21:00:00,1234.948091,4.235040,1.0,0,0,0,1,0,0,0,...,0.973877,0.017166,0.999853,0,0,0,1,0,0,0
2015-12-31 22:00:00,1215.899839,5.156763,1.0,0,0,0,1,0,0,0,...,0.971342,0.017166,0.999853,0,0,0,1,0,0,0
2015-12-31 23:00:00,1184.392095,5.301019,1.0,0,0,0,1,0,0,0,...,0.968692,0.017166,0.999853,0,0,0,1,0,0,0


In [55]:
column_indices = {name: i for i, name in enumerate(df.columns)}

n = len(df)
train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]

num_features = df.shape[1]

In [56]:
column_indices

{'value': 0,
 'Temperatures (°C)': 1,
 'working day': 2,
 'week_1': 14,
 'week_2': 15,
 'week_3': 16,
 'week_4': 17,
 'week_5': 18,
 'week_6': 19,
 'week_7': 20,
 'fractional hour_sin': 10,
 'fractional hour_cos': 11,
 'day of year_sin': 12,
 'day of year_cos': 13}

In [76]:
class WindowGenerator():
    def __init__(self, input_width, label_width, shift,
               train_df=train_df, val_df=val_df, test_df=test_df,
               label_columns=None):
        # Store the raw data.
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df

        # Work out the label column indices.
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in
                                        enumerate(label_columns)}
        self.column_indices = {name: i for i, name in
                               enumerate(train_df.columns)}

        # Work out the window parameters.
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = input_width + shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}',
            f'Label column name(s): {self.label_columns}'])
    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.labels_slice, :]
        if self.label_columns is not None:
            labels = tf.stack([labels[:, :, self.column_indices[name]] for name in self.label_columns],axis=-1)

        # Slicing doesn't preserve static shape information, so set the shapes
        # manually. This way the `tf.data.Datasets` are easier to inspect.
        inputs.set_shape([None, self.input_width, None])
        labels.set_shape([None, self.label_width, None])

        return inputs, labels
    def make_dataset(self, data):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.preprocessing.timeseries_dataset_from_array(
          data=data,
          targets=None,
          sequence_length=self.total_window_size,
          sequence_stride=1,
          shuffle=True,
          batch_size=32,)

        ds = ds.map(self.split_window)
        return ds
    @property
    def train(self):
        return self.make_dataset(self.train_df)

    @property
    def val(self):
        return self.make_dataset(self.val_df)

    @property
    def test(self):
        return self.make_dataset(self.test_df)

    @property
    def example(self):
        """Get and cache an example batch of `inputs, labels` for plotting."""
        result = getattr(self, '_example', None)
        if result is None:
            # No example batch was found, so get one from the `.train` dataset
            result = next(iter(self.train))
            # And cache it for next time
            self._example = result
        return result

    WindowGenerator.train = train
    WindowGenerator.val = val
    WindowGenerator.test = test
    WindowGenerator.example = example
    WindowGenerator.make_dataset = make_dataset
    

In [77]:
w2 = WindowGenerator(input_width=24, label_width=24, shift=24,
                     label_columns=['value'])
w2

Total window size: 48
Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
Label indices: [24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47]
Label column name(s): ['value']

In [78]:
# Stack three slices, the length of the total window:
example_window = tf.stack([np.array(train_df[:w2.total_window_size]),
                           np.array(train_df[100:100+w2.total_window_size]),
                           np.array(train_df[200:200+w2.total_window_size])])

In [79]:
example_inputs, example_labels = w2.split_window(example_window)

print('All shapes are: (batch, time, features)')
print(f'Window shape: {example_window.shape}')
print(f'Inputs shape: {example_inputs.shape}')
print(f'labels shape: {example_labels.shape}')


All shapes are: (batch, time, features)
Window shape: (3, 48, 21)
Inputs shape: (3, 24, 21)
labels shape: (3, 24, 1)


In [80]:
w2.train.element_spec

AttributeError: module 'tensorflow_core.keras.preprocessing' has no attribute 'timeseries_dataset_from_array'

In [81]:
tf.__version__

'2.0.0'