In [1]:
import datetime
import sys
import os

import IPython
import IPython.display
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input, Model
from sklearn.preprocessing import MinMaxScaler

import src.preprocessing_3days
from src.preprocessing_3days import series_to_supervised, preprocess
from src.functions import load_data, TimeSeriesTensor, create_evaluation_df, plot_train_history, validation, save_model, load_model

np.set_printoptions(threshold=sys.maxsize)
pd.options.display.max_seq_items = 2000

In [2]:
def train_test_split(df, n_test):
    if len(df) < 8760:
        n_test = round(len(df) * 0.2)
    test_df = df.copy()[-(n_test+71):]
    train_df = df.copy()[:-(len(test_df)-71)]
    return train_df, test_df

In [3]:
def MIMO_fulldata_preparation(df, n_test=4380, T=72, HORIZON=72):
    df = df.merge(series_to_supervised(df), how='right', left_index=True, right_index=True)
    df = preprocess(df, 'Belgium')
    train_df, test_df = train_test_split(df, n_test)
    y_scaler = MinMaxScaler()
    y_scaler.fit(train_df[['value']])
    long_scaler = MinMaxScaler()
    test_df[test_df.columns] = long_scaler.fit_transform(test_df)
    train_df[train_df.columns] = long_scaler.fit_transform(train_df)
    #print(train_df.columns)
    #tensor_structure = {'X':(range(-T+1, 1), train_df.columns[:1]), 'X2':(range(1, 73), train_df.columns[1:6]), 'static':(None, train_df.columns[6:])}
    tensor_structure = {'X':(range(-T+1, 1), train_df.columns[:1]), 'X2':(range(1, 73), train_df.columns[1:])}
    #tensor_structure = {'X':(range(-T+1, 1), train_df.columns)}
    #print(tensor_structure[0])
    train_inputs = TimeSeriesTensor(train_df, 'value', HORIZON, tensor_structure)
    test_inputs = TimeSeriesTensor(test_df, 'value', HORIZON, tensor_structure)
    return train_inputs, test_inputs, y_scaler

In [4]:
MAX_EPOCHS = 100
BATCHSIZE = 32
patience = 10
HORIZON = 72


FULL_LSTMIMO = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(32, input_shape=(HORIZON, 14)),
    # Shape => [batch, time, features]
    tf.keras.layers.Dense(HORIZON)
])




metrics = pd.DataFrame(columns=['mae','mape', 'rmse', 'B'], index=range(28))
from progressbar import ProgressBar
pbar = ProgressBar()
dX_train = []
dT_train = []
dX_test = []
dX_scaler = []
for i in pbar(range(1,29)):
    filename = '../data/Columbia_clean/Residential_'+str(i)+'.csv'
    df = pd.read_csv(filename, index_col=0)
    train_inputs, test_inputs, y_scaler = MIMO_fulldata_preparation(df, n_test=4380, T=72, HORIZON=72)
    dX_train.append(tf.concat([train_inputs['X'],train_inputs['X2']], axis=2))
    dT_train.append(train_inputs['target'])
    dX_test.append(test_inputs)
    dX_scaler.append(y_scaler)
global_inputs_X = tf.concat(dX_train, 0)
global_inputs_T = tf.concat(dT_train, 0)
#test_inputs = pd.concat(dn_test, axis=1)

# full data LSTM MIMO compilation and fit
FULL_LSTMIMO.compile(optimizer=tf.optimizers.Adam(), loss='mse', metrics=[tf.metrics.MeanSquaredError()])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min')
        
history = FULL_LSTMIMO.fit(global_inputs_X, global_inputs_T, batch_size=1500, epochs=MAX_EPOCHS,
                      validation_split=0.15,
                      callbacks=[early_stopping], verbose=1)
save_model(FULL_LSTMIMO, 'Columbia_model')

for i in range(0,28):
    concat_input = tf.concat([dX_test[i]['X'],dX_test[i]['X2']], axis=2)
    FD_predictions = FULL_LSTMIMO.predict(concat_input)
    FD_eval_df = create_evaluation_df(FD_predictions, dX_test[i], HORIZON, dX_scaler[i])
    mae = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'MAE')
    mape = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'MAPE')
    rmse = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'RMSE')
    #print('rmse {}'.format(rmse))
    metrics.loc[i] = pd.Series({'mae':mae, 'mape':mape, 'rmse':rmse, 'B': names[i]})
metrics.to_csv('./results/Columbia/global/3days/revised2_LSTM.csv')

In [5]:
import datetime
import sys
import os

import IPython
import IPython.display
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input, Model
from sklearn.preprocessing import MinMaxScaler

import src.preprocessing_3days
from src.preprocessing_3days import series_to_supervised, preprocess
from src.functions import load_data, TimeSeriesTensor, create_evaluation_df, plot_train_history, validation, save_model, load_model

np.set_printoptions(threshold=sys.maxsize)
pd.options.display.max_seq_items = 2000

In [6]:
def train_test_split(df, n_test):
    if len(df) < 8760:
        n_test = round(len(df) * 0.2)
    test_df = df.copy()[-(n_test+71):]
    train_df = df.copy()[:-(len(test_df)-71)]
    return train_df, test_df

In [7]:
def MIMO_fulldata_preparation(df, n_test=4380, T=72, HORIZON=72):
    df = df.merge(series_to_supervised(df), how='right', left_index=True, right_index=True)
    df = preprocess(df, 'Belgium')
    train_df, test_df = train_test_split(df, n_test)
    y_scaler = MinMaxScaler()
    y_scaler.fit(train_df[['value']])
    long_scaler = MinMaxScaler()
    test_df[test_df.columns] = long_scaler.fit_transform(test_df)
    train_df[train_df.columns] = long_scaler.fit_transform(train_df)
    #print(train_df.columns)
    #tensor_structure = {'X':(range(-T+1, 1), train_df.columns[:1]), 'X2':(range(1, 73), train_df.columns[1:6]), 'static':(None, train_df.columns[6:])}
    tensor_structure = {'X':(range(-T+1, 1), train_df.columns[:1]), 'X2':(range(1, 73), train_df.columns[1:])}
    #tensor_structure = {'X':(range(-T+1, 1), train_df.columns)}
    #print(tensor_structure[0])
    train_inputs = TimeSeriesTensor(train_df, 'value', HORIZON, tensor_structure)
    test_inputs = TimeSeriesTensor(test_df, 'value', HORIZON, tensor_structure)
    return train_inputs, test_inputs, y_scaler

In [8]:
MAX_EPOCHS = 100
BATCHSIZE = 32
patience = 10
HORIZON = 72


FULL_LSTMIMO = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(32, input_shape=(HORIZON, 14)),
    # Shape => [batch, time, features]
    tf.keras.layers.Dense(HORIZON)
])




metrics = pd.DataFrame(columns=['mae','mape', 'rmse', 'B'], index=range(28))
from progressbar import ProgressBar
pbar = ProgressBar()
dX_train = []
dT_train = []
dX_test = []
dX_scaler = []
for i in pbar(range(1,29)):
    filename = '../data/Columbia_clean/Residential_'+str(i)+'.csv'
    df = pd.read_csv(filename, index_col=0)
    train_inputs, test_inputs, y_scaler = MIMO_fulldata_preparation(df, n_test=4380, T=72, HORIZON=72)
    dX_train.append(tf.concat([train_inputs['X'],train_inputs['X2']], axis=2))
    dT_train.append(train_inputs['target'])
    dX_test.append(test_inputs)
    dX_scaler.append(y_scaler)
global_inputs_X = tf.concat(dX_train, 0)
global_inputs_T = tf.concat(dT_train, 0)
#test_inputs = pd.concat(dn_test, axis=1)

# full data LSTM MIMO compilation and fit
FULL_LSTMIMO.compile(optimizer=tf.optimizers.Adam(), loss='mse', metrics=[tf.metrics.MeanSquaredError()])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min')
        
history = FULL_LSTMIMO.fit(global_inputs_X, global_inputs_T, batch_size=1500, epochs=MAX_EPOCHS,
                      validation_split=0.15,
                      callbacks=[early_stopping], verbose=1)
save_model(FULL_LSTMIMO, 'Columbia_model')

for i in range(0,28):
    concat_input = tf.concat([dX_test[i]['X'],dX_test[i]['X2']], axis=2)
    FD_predictions = FULL_LSTMIMO.predict(concat_input)
    FD_eval_df = create_evaluation_df(FD_predictions, dX_test[i], HORIZON, dX_scaler[i])
    mae = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'MAE')
    mape = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'MAPE')
    rmse = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'RMSE')
    #print('rmse {}'.format(rmse))
    metrics.loc[i] = pd.Series({'mae':mae, 'mape':mape, 'rmse':rmse, 'B': names[i]})
metrics.to_csv('./results/Columbia/global/3days/revised2_LSTM.csv')

In [9]:
MAX_EPOCHS = 100
BATCHSIZE = 32
patience = 10
HORIZON = 72


FULL_LSTMIMO = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(32, input_shape=(HORIZON, 14)),
    # Shape => [batch, time, features]
    tf.keras.layers.Dense(HORIZON)
])




metrics = pd.DataFrame(columns=['mae','mape', 'rmse', 'B'], index=range(28))
from progressbar import ProgressBar
pbar = ProgressBar()
dX_train = []
dT_train = []
dX_test = []
dX_scaler = []
for i in range(1,29):
    filename = '../data/Columbia_clean/Residential_'+str(i)+'.csv'
    df = pd.read_csv(filename, index_col=0)
    train_inputs, test_inputs, y_scaler = MIMO_fulldata_preparation(df, n_test=4380, T=72, HORIZON=72)
    dX_train.append(tf.concat([train_inputs['X'],train_inputs['X2']], axis=2))
    dT_train.append(train_inputs['target'])
    dX_test.append(test_inputs)
    dX_scaler.append(y_scaler)
global_inputs_X = tf.concat(dX_train, 0)
global_inputs_T = tf.concat(dT_train, 0)
#test_inputs = pd.concat(dn_test, axis=1)

# full data LSTM MIMO compilation and fit
FULL_LSTMIMO.compile(optimizer=tf.optimizers.Adam(), loss='mse', metrics=[tf.metrics.MeanSquaredError()])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min')
        
history = FULL_LSTMIMO.fit(global_inputs_X, global_inputs_T, batch_size=1500, epochs=MAX_EPOCHS,
                      validation_split=0.15,
                      callbacks=[early_stopping], verbose=1)
save_model(FULL_LSTMIMO, 'Columbia_model')

for i in pbar(range(0,28)):
    concat_input = tf.concat([dX_test[i]['X'],dX_test[i]['X2']], axis=2)
    FD_predictions = FULL_LSTMIMO.predict(concat_input)
    FD_eval_df = create_evaluation_df(FD_predictions, dX_test[i], HORIZON, dX_scaler[i])
    mae = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'MAE')
    mape = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'MAPE')
    rmse = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'RMSE')
    #print('rmse {}'.format(rmse))
    metrics.loc[i] = pd.Series({'mae':mae, 'mape':mape, 'rmse':rmse, 'B': names[i]})
metrics.to_csv('./results/Columbia/global/3days/revised2_LSTM.csv')

In [10]:
MAX_EPOCHS = 100
BATCHSIZE = 32
patience = 10
HORIZON = 72


FULL_LSTMIMO = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(32, input_shape=(HORIZON, 14)),
    # Shape => [batch, time, features]
    tf.keras.layers.Dense(HORIZON)
])




metrics = pd.DataFrame(columns=['mae','mape', 'rmse', 'B'], index=range(28))
from progressbar import ProgressBar
pbar = ProgressBar()
dX_train = []
dT_train = []
dX_test = []
dX_scaler = []
for i in range(1,29):
    filename = '../data/Columbia_clean/Residential_'+str(i)+'.csv'
    df = pd.read_csv(filename, index_col=0)
    train_inputs, test_inputs, y_scaler = MIMO_fulldata_preparation(df, n_test=4380, T=72, HORIZON=72)
    dX_train.append(tf.concat([train_inputs['X'],train_inputs['X2']], axis=2))
    dT_train.append(train_inputs['target'])
    dX_test.append(test_inputs)
    dX_scaler.append(y_scaler)
global_inputs_X = tf.concat(dX_train, 0)
global_inputs_T = tf.concat(dT_train, 0)
#test_inputs = pd.concat(dn_test, axis=1)

# full data LSTM MIMO compilation and fit
FULL_LSTMIMO.compile(optimizer=tf.optimizers.Adam(), loss='mse', metrics=[tf.metrics.MeanSquaredError()])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min')
        
history = FULL_LSTMIMO.fit(global_inputs_X, global_inputs_T, batch_size=1500, epochs=MAX_EPOCHS,
                      validation_split=0.15,
                      callbacks=[early_stopping], verbose=1)
save_model(FULL_LSTMIMO, 'Columbia_model')

for i in pbar(range(0,28)):
    concat_input = tf.concat([dX_test[i]['X'],dX_test[i]['X2']], axis=2)
    FD_predictions = FULL_LSTMIMO.predict(concat_input)
    FD_eval_df = create_evaluation_df(FD_predictions, dX_test[i], HORIZON, dX_scaler[i])
    mae = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'MAE')
    mape = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'MAPE')
    rmse = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'RMSE')
    #print('rmse {}'.format(rmse))
    metrics.loc[i] = pd.Series({'mae':mae, 'mape':mape, 'rmse':rmse, 'B': i})
metrics.to_csv('./results/Columbia/global/3days/revised2_LSTM.csv')

In [11]:
import datetime
import sys
import os

import IPython
import IPython.display
import numpy as np
import pandas as pd

import wandb
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input, Model
from sklearn.preprocessing import MinMaxScaler

import src.preprocessing_3days
from src.preprocessing_3days import series_to_supervised, preprocess
from src.functions import load_data, TimeSeriesTensor, create_evaluation_df, plot_train_history, validation, save_model, load_model

np.set_printoptions(threshold=sys.maxsize)
pd.options.display.max_seq_items = 2000

In [12]:
import datetime
import sys
import os

import IPython
import IPython.display
import numpy as np
import pandas as pd

import wandb
from wandb.keras import WandbCallback
wandb.login()

import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input, Model
from sklearn.preprocessing import MinMaxScaler

import src.preprocessing_3days
from src.preprocessing_3days import series_to_supervised, preprocess
from src.functions import load_data, TimeSeriesTensor, create_evaluation_df, plot_train_history, validation, save_model, load_model

np.set_printoptions(threshold=sys.maxsize)
pd.options.display.max_seq_items = 2000

In [13]:
MAX_EPOCHS = 100
BATCHSIZE = 32
patience = 10
HORIZON = 72


FULL_LSTMIMO = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(32, input_shape=(HORIZON, 14)),
    # Shape => [batch, time, features]
    tf.keras.layers.Dense(HORIZON)
])




metrics = pd.DataFrame(columns=['mae','mape', 'rmse', 'B'], index=range(28))
from progressbar import ProgressBar
pbar = ProgressBar()
dX_train = []
dT_train = []
dX_test = []
dX_scaler = []
for i in range(1,29):
    filename = '../data/Columbia_clean/Residential_'+str(i)+'.csv'
    df = pd.read_csv(filename, index_col=0)
    train_inputs, test_inputs, y_scaler = MIMO_fulldata_preparation(df, n_test=4380, T=72, HORIZON=72)
    dX_train.append(tf.concat([train_inputs['X'],train_inputs['X2']], axis=2))
    dT_train.append(train_inputs['target'])
    dX_test.append(test_inputs)
    dX_scaler.append(y_scaler)
global_inputs_X = tf.concat(dX_train, 0)
global_inputs_T = tf.concat(dT_train, 0)
#test_inputs = pd.concat(dn_test, axis=1)

# 1️⃣ Start a new run, tracking config metadata
wandb.init(project="3days_forcast", config={
    "batch_size": 1500,
    "architecture": "RNN with forward lags for temporal",
    "dataset": "Columbia",
    "epochs": 100,
    'patience': 10
})
config = wandb.config

# full data LSTM MIMO compilation and fit
FULL_LSTMIMO.compile(optimizer=tf.optimizers.Adam(), loss='mse', metrics=[tf.metrics.MeanSquaredError()])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min')
        
history = FULL_LSTMIMO.fit(global_inputs_X, global_inputs_T, batch_size=1500, epochs=MAX_EPOCHS,
                      validation_split=0.15,
                      callbacks=[early_stopping], verbose=1)
wandb.tensorflow.log(tf.summary.merge_all())
save_model(FULL_LSTMIMO, 'Columbia_model')

for i in pbar(range(0,28)):
    concat_input = tf.concat([dX_test[i]['X'],dX_test[i]['X2']], axis=2)
    FD_predictions = FULL_LSTMIMO.predict(concat_input)
    FD_eval_df = create_evaluation_df(FD_predictions, dX_test[i], HORIZON, dX_scaler[i])
    mae = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'MAE')
    mape = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'MAPE')
    rmse = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'RMSE')
    #print('rmse {}'.format(rmse))
    metrics.loc[i] = pd.Series({'mae':mae, 'mape':mape, 'rmse':rmse, 'B': i})
wandb.log({"mape": matrics.mape.mean()})
wandb.log({"rmse": matrics.rmse.mean()})
wandb.log({"mae": matrics.mae.mean()})
metrics.to_csv('./results/Columbia/global/3days/revised2_LSTM.csv')

In [14]:
tf.summary()

In [15]:
tf.summary

<module 'tensorboard.summary._tf.summary' from '/home/ubuntu/anaconda3/envs/evgeny/lib/python3.8/site-packages/tensorboard/summary/_tf/summary/__init__.py'>

In [16]:
MAX_EPOCHS = 100
BATCHSIZE = 32
patience = 10
HORIZON = 72


FULL_LSTMIMO = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(32, input_shape=(HORIZON, 14)),
    # Shape => [batch, time, features]
    tf.keras.layers.Dense(HORIZON)
])




metrics = pd.DataFrame(columns=['mae','mape', 'rmse', 'B'], index=range(28))
from progressbar import ProgressBar
pbar = ProgressBar()
dX_train = []
dT_train = []
dX_test = []
dX_scaler = []
for i in range(1,29):
    filename = '../data/Columbia_clean/Residential_'+str(i)+'.csv'
    df = pd.read_csv(filename, index_col=0)
    train_inputs, test_inputs, y_scaler = MIMO_fulldata_preparation(df, n_test=4380, T=72, HORIZON=72)
    dX_train.append(tf.concat([train_inputs['X'],train_inputs['X2']], axis=2))
    dT_train.append(train_inputs['target'])
    dX_test.append(test_inputs)
    dX_scaler.append(y_scaler)
global_inputs_X = tf.concat(dX_train, 0)
global_inputs_T = tf.concat(dT_train, 0)
#test_inputs = pd.concat(dn_test, axis=1)

# 1️⃣ Start a new run, tracking config metadata
wandb.init(project="3days_forcast", config={
    "batch_size": 1500,
    "architecture": "RNN with forward lags for temporal",
    "dataset": "Columbia",
    "epochs": 100,
    'patience': 10
})
config = wandb.config

# full data LSTM MIMO compilation and fit
FULL_LSTMIMO.compile(optimizer=tf.optimizers.Adam(), loss='mse', metrics=[tf.metrics.MeanSquaredError()])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, mode='min')
        
history = FULL_LSTMIMO.fit(global_inputs_X, global_inputs_T, batch_size=1500, epochs=MAX_EPOCHS,
                      validation_split=0.15,
                      callbacks=[early_stopping], verbose=1, hooks=[wandb.tensorflow.WandbHook(steps_per_log=1000)])
save_model(FULL_LSTMIMO, 'Columbia_model')

for i in pbar(range(0,28)):
    concat_input = tf.concat([dX_test[i]['X'],dX_test[i]['X2']], axis=2)
    FD_predictions = FULL_LSTMIMO.predict(concat_input)
    FD_eval_df = create_evaluation_df(FD_predictions, dX_test[i], HORIZON, dX_scaler[i])
    mae = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'MAE')
    mape = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'MAPE')
    rmse = validation(FD_eval_df['prediction'], FD_eval_df['actual'], 'RMSE')
    #print('rmse {}'.format(rmse))
    metrics.loc[i] = pd.Series({'mae':mae, 'mape':mape, 'rmse':rmse, 'B': i})
wandb.log({"mape": matrics.mape.mean()})
wandb.log({"rmse": matrics.rmse.mean()})
wandb.log({"mae": matrics.mae.mean()})
metrics.to_csv('./results/Columbia/global/3days/revised2_LSTM.csv')