In [None]:
import os
import gc
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "0"

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.figure as fig
import statsmodels.api as sm
import statsmodels.tsa.api as tsa
# print("statsmodels version: "+sm.__version__)
%matplotlib inline

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout, LeakyReLU
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras import initializers
from tensorflow.keras import callbacks
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import losses
import sys


In [None]:
tf.keras.backend.clear_session()
tf.executing_eagerly()
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the desired GPU
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
        print("Memory growth set to: "+str(tf.config.experimental.get_memory_growth(gpus[0])))
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)

## Helper Functions

In [None]:
#Generate lagged Y to use as input matrix(X) and future lagged Y as output matrix(Y) for the NeuralNet
def ts2IF(ts, s, h):
    m = ts.shape[0] - s - h + 1
    x = pd.DataFrame(np.random.randint(10, size=(m, s)))
    y = pd.DataFrame(np.random.randint(10, size=(m, h)))
    for i in range(m):
        for j in range(x.shape[1]): x.iloc[i, j] = ts[i + j]
        for j in range(y.shape[1]): y.iloc[i, j] = ts[i + j + s]
    #x['ones'] = np.ones(x.shape[0])                        # add first column of all ones
    x.columns = ["lag"+str(x) if(x!=0) else "origY" for x in range(1,s+1)]
#     x.insert(0,"ones",np.ones(x.shape[0]).astype(int),True)                # add first column of all ones
    return (x, y)

def chopr(x,y,te,te_size,tr_size):
    te2 = te + te_size
    tr = te - tr_size
#     print("te="+str(te))
#     print("te_size="+str(te_size))
#     print("tr_size="+str(tr_size))
    x_e = x[te:te2]
    y_e = y[te:te2]
    x_r = x[tr:te]
    y_r = y[tr:te]
    
#     print("test: x_e("+str(te)+" .. "+str(te2-1)+")")
#     print("test: y_e("+str(te)+" .. "+str(te2-1)+")")
#     print("train: x_r("+str(tr)+" .. "+str(te-1)+")")
#     print("train: y_r("+str(tr)+" .. "+str(te-1)+")")
    return(x_e, y_e, x_r, y_r)

def shift_rm(xy1,xy2, x_test):
    d1 = xy1[1].shape[0]
    d2 = xy2[1].shape[0]
    print(d1)
    print(d2)
    gap = d1 - d2
    x = pd.DataFrame(np.random.randint(10, size=(d1, xy1[0].shape[1])))
    y = pd.DataFrame(np.random.randint(10, size=(d1, xy1[1].shape[1])))
    new_x_test = pd.DataFrame(np.random.randint(10, size=(x_test.shape[0] - d2, x_test.shape[1])))
    for i in range(y.shape[0]):
        if i<gap:
            for j in range(x.shape[1]): x.iloc[i,j] = xy1[0].iloc[i+d2,j].copy()
            y.iloc[i,:] = xy1[1].iloc[i+d2].copy()
        else:
            for j in range(x.shape[1]): x.iloc[i,j] = xy2[0].iloc[i-gap,j].copy()
            y.iloc[i,:] = xy2[1].iloc[i-gap].copy()
    for i in range(new_x_test.shape[0]):
        for j in range(new_x_test.shape[1]): 
            new_x_test.iloc[i,j] = x_test.iloc[i+d2,j].copy()
#     print("old x_r="+str(xy1[0].shape))
#     print("old y_r="+str(xy1[1].shape))
#     print("x_r="+str(x.shape))
#     print("y_r="+str(y.shape))
#     print("old x_e="+str(xy2[0].shape))
#     print("old y_e="+str(xy2[1].shape))
#     print("x_e="+str(xy2[0].shape))
#     print("y_e="+str(xy2[1].shape))
    x.columns = xy1[0].columns
    y.columns = xy1[1].columns
    new_x_test.columns = x_test.columns
    return(x,y, new_x_test)

def new_shift_rm(x_r, y_r, x_e, y_e):
    if(isinstance(x_r, np.ndarray)):
        x_r = np.delete(x_r, [0], axis=0)
        x_r = np.concatenate([x_r, [x_e[0]]])
        x_e = np.delete(x_e, [0], axis=0)
    else:
        x_r = x_r.iloc[1:, :]
        x_r = x_r.append(pd.DataFrame([x_e.iloc[0].to_numpy()], columns = x_e.columns))
        x_e = x_e.iloc[1:, :]
        x_r.reset_index(inplace=True, drop= True)
        x_e.reset_index(inplace=True, drop= True)
    
    y_r = y_r.iloc[1:, :]
    y_r = y_r.append(pd.DataFrame([y_e.iloc[0].to_numpy()], columns = y_e.columns))
    y_e = y_e.iloc[1:, :]
    y_r.reset_index(inplace=True, drop= True)
    y_e.reset_index(inplace=True, drop= True)
    return (x_r, y_r, x_e, y_e)

#Generic Rescale Method
from sklearn.preprocessing import minmax_scale, scale
def rescale(inp,act_func='tanh'):
    if(act_func=='tanh'): return  minmax_scale(inp,feature_range=(-1,1))
    if(act_func=='sigmoid'): return  minmax_scale(inp,feature_range=(0,1))
    if(act_func=='linear' or act_func=='elu'): return  inp
    else: return inp

# Reverse scaling
# from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler((-1,1))
# scaler.fit(lagY)
# yp_unscaled = scaler.inverse_transform(yp)
# yp_unscaled.shape

def eval(y,yp):
    import math
    roundTo = 5
    m = y.shape[0]
    e = y - yp
    yt = y - y.mean()
    sse = e.dot(e)
    sst = yt.dot(yt)
    rSq = round((1 - ((sse)/(sst))),roundTo)
    mape = round(((np.absolute(e)/np.absolute(y)).sum())*100/m,roundTo)
    mae = round(((np.absolute(e)).sum())/m,roundTo)
    t = (np.absolute(e)/(np.absolute(y)+np.absolute(yp)))
#     print(t.sum()*200/m)
    smape = round(((np.absolute(e)/(np.absolute(y)+np.absolute(yp))).sum())*200/m,roundTo)
    # smape  = 200 * (e.abs / (yy.abs + yp.abs)).sum / m
    mse = round((sse/m),roundTo)
    rmse = round(math.sqrt(mse),roundTo)
#     return(rSq,mape,mse,rmse,sse,sst)
    return(format(rSq,'.4f'),format(mape,'.2f'),format(smape,'.2f'),format(mse,'.2f'),format(rmse,'.2f'),format(sse,'.2f'),format(sst,'.2f'), format(mae,'.2f'))

def plotHistory(fit_history,ls="MAPE"):
    #Get training loss
    loss = fit_history.history['loss']
    # val_loss = fit_history.history['val_loss']

    # Create count of the number of epochs
    epoch_count = range(1, len(loss) + 1)

#     # Visualize loss history - Plotting Loss vs Number of Epochs during training
    plt.figure(num=0,figsize=(6,3))
    plt.plot(epoch_count, loss)
    # plt.plot(val_loss)
    plt.legend(['Training Loss - '+str(ls), 'Validation Loss - '+str(ls)])
    plt.xlabel('Epochs')
    plt.ylabel(ls)
    plt.title("Loss vs Epochs")
    plt.show()

#     print("_________________________________________________________________________________________")
    print("Min Training Loss = " + str(np.min(loss)) + " ",end='')
    print("at Epoch " + str(np.argmin(loss) + 1))
    # print("Min Validation Loss = " + str(np.min(val_loss)) + " ",end='')
    # print("at Epoch " + str(np.argmin(val_loss) + 1))
#     print("__________________________________________________________________________________________")

#Generic Model Build
def buildModel(modelName,nfeat,nhidden,act_func='tanh',optimzer=optimizers.Adam(),loss_func='mean_squared_error',dropout=None,horizons=14):
#     print("Create a " + modelName + " with "+ str(nfeat) +" input, "+str(nhidden)+" hidden nodes")
    model = Sequential()
    if modelName == 'perceptron':
        model.add(Dense(1,input_dim=nfeat,activation=act_func[0]))                  #1st Hidden Layer - 1 neuron
    if modelName == 'NeuralNet_3L':
        model.add(Dense(nhidden,input_dim=nfeat,activation=act_func[0]))         #1st Hidden Layer
        model.add(Dense(1,activation=act_func[1]))                               #Output Layer
    if modelName == 'NeuralNet_3L_MH':
        model.add(Dense(nhidden,input_dim=nfeat,activation=act_func[0]))
        model.add(Dropout(0.1))
        model.add(Dense(14,activation=act_func[1]))                              #Output Layer - MultiHorizon(10)
    if modelName == 'NeuralNet_4L':
        model.add(Dense(nhidden,input_dim=nfeat,activation=act_func[0]))         #1st Hidden Layer
        model.add(Dense(nhidden,activation=act_func[1]))                         #2nd Hidden Layer
        if dropout: 
            model.add(Dropout(dropout))
        model.add(Dense(1,activation=act_func[2]))                               #Output Layer
    if modelName == 'NeuralNet_4L_MH':
        model.add(Dense(nhidden,input_dim=nfeat,activation=act_func[0]))         #1st Hidden Layer
        model.add(Dense(nhidden,activation=act_func[1]))                         #2nd Hidden Layer
#         if not dropout: model.add(Dropout(dropout))
        model.add(Dense(Y.shape[1],activation=act_func[2]))                              #Output Layer - MultiHorizon(10)
    # model.compile(loss='mean_absolute_percentage_error', optimizer=optimzer, metrics=['mean_absolute_percentage_error'])
    model.compile(loss=loss_func, optimizer=optimzer, metrics=['mean_absolute_percentage_error'])
    return model
#     model.compile(loss='mean_squared_error', optimizer=optimzer, metrics=['mean_absolute_percentage_error'])
#model.add(Dense(nhidden,activation=act_func[0]))   
#model.add(LeakyReLU())
#model.add(Dropout(0.18))
def rollingValidate(model,train,x,y,kt,horizons,callbk,nEpoch,nBatch,plot,p1=False,p2=False,shuffle=False, tr_ratio=0.40808823529):
    TR_RATIO = tr_ratio
    m = y.shape[0]
    tr_size = int(m * TR_RATIO)
    te_size = int(m - tr_size)
    te = int(tr_size)
    if kt < 0: kt = te_size
    print("m = "+str(m)+", tr_size = "+str(tr_size)+", te_size = "+str(te_size)+", kt = "+str(kt)+", horizons = "+str(horizons))
    
    (x_e, y_e, x_r, y_r) = chopr (x, y, te, te_size, tr_size)
#     x_r = x_r[1000:]
#     y_r = y_r[1000:]
    print("Training shape"+str(x_r.shape))
    print("Training shape"+str(y_r.shape))
    print("Testing shape"+str(x_e.shape))
    print("Testing shape"+str(y_e.shape))
    smapearr = [x for x in range(10,(10+horizons))]
#     for h in range(1,horizons+1):
# #         print(type(y_r))
#         yr =  y_r[h-1]
#         inp = y_e[h-1] #.values.tolist()
#         yrw = inp[:-h]
#         for i in range(h):
#             yrw.insert(i,inp[i])
#         yrw = np.array(yrw)
#         rSqr, maper, smaper, mser, rmser, sser, sstr = eval(y_e[h-1],yrw)
#         smapearr[h-1] = smaper

    rt = 0
#     print("number of re-trainings required = "+str(int((te_size/kt)+1)))
    forecast_matrix = pd.DataFrame(columns = y_e.columns)
    new_x_e = x_e.copy()
    new_y_e = y_e.copy()
    fit_history = model.fit(x_r, y_r,validation_split=0.0,shuffle=shuffle,epochs=nEpoch, \
                                        batch_size=nBatch,verbose=0, callbacks=None)#callbacks=[tfa.callbacks.TQDMProgressBar(leave_epoch_progress=False,show_epoch_progress=False)]  workers=10, use_multiprocessing=True,
    for i in range(y_e.shape[0]):
        print(f"i: {i} of {y_e.shape[0]}")
        if i%kt == 0:
            rt = rt + 1
            if train:
                print("training set: "+str(x_r.shape))
                if callbk==1:
                    callb = callbacks.EarlyStopping(monitor='loss', restore_best_weights=True, patience=40,verbose=1)
                else: callb = None   #batch_input_shape=(10, 5, 1)
#                 fit_history = model.fit(x_r, y_r,validation_split=0.0,shuffle=True,epochs=nEpoch, \
#                                         batch_input_shape=(64, 44, 1),verbose=0, callbacks=callb) #callbacks=[tfa.callbacks.TQDMProgressBar(leave_epoch_progress=False,show_epoch_progress=False)]  workers=10, use_multiprocessing=True,
                fit_history = model.fit(x_r, y_r,validation_split=0.0,shuffle=shuffle,epochs=nEpoch, \
                                        batch_size=nBatch,verbose=0, callbacks=callb) #callbacks=[tfa.callbacks.TQDMProgressBar(leave_epoch_progress=False,show_epoch_progress=False)]  workers=10, use_multiprocessing=True,
            if plot: plotHistory(fit_history)
            
        prediction = model.predict(np.array([new_x_e.iloc[0].to_numpy()]))
        forecast_matrix = forecast_matrix.append(pd.DataFrame([prediction[0]]))
        x_r, y_r, new_x_e, new_y_e = new_shift_rm(x_r, y_r, new_x_e, new_y_e)
#     print("\nOut-Sample Results")
#     rw = np.array([2.47,3.79,4.6,5.23,5.75,6.18,6.61,6.94,7.29,7.70]) #RW results - mape for 50% TR
    rw = np.array([float(x) for x in smapearr]) #calculated using python
    beats = []
    prediction = model.predict(x_e,batch_size=x_e.shape[0])
#     print(type(prediction))
#     print(prediction.shape)
#     print(type(y_e))
#     print(y_e.shape)
#     print(y_e.columns)
    rSq  = [x for x in range(y.shape[1])]
    mse  = [x for x in range(y.shape[1])]
    sse  = [x for x in range(y.shape[1])]
    sst  = [x for x in range(y.shape[1])]
    rmse = [x for x in range(y.shape[1])]
    mape = [x for x in range(y.shape[1])]
    smape = [x for x in range(y.shape[1])]
    mae = [x for x in range(y.shape[1])]
    maxdif = 0
    maxdifh = 0
    for h in range(1,y.shape[1]+1):
        yf = prediction[:,h-1]
        rSq[h-1], mape[h-1], smape[h-1], mse[h-1], rmse[h-1], sse[h-1], sst[h-1], mae[h-1] = eval(y_e.values[:,h-1],forecast_matrix.values[:,h-1])
        if p1:
            print(str(smape[h-1]))  #,str(rw[h-1])
#     for h in range(1,y.shape[1]+1):
#         yf = prediction[:,h-1]
#         rSq[h-1], mape[h-1], smape[h-1], mse[h-1], rmse[h-1], sse[h-1], sst[h-1], mae = eval(y_e.values[:,h-1],yf)
#         if p1:
#             print(str(smape[h-1]))  #,str(rw[h-1])

    return (rSq, mape, smape, mse, rmse, sse, sst, mae, beats, prediction, forecast_matrix, y_e)

# Data

In [None]:
all_data = pd.read_csv("2021-08-26-21-42-06-OWID.csv")
data = all_data[['new_deaths']]

## Data Augmentation

In [None]:
sarima_forecasts = pd.read_csv('SARIMAX_IN+OUT_FORECASTS_new_dataset.csv')

In [None]:
sarima_forecasts = sarima_forecasts[["h1"]]

In [None]:
orig = data.iloc[38:-1]

In [None]:
half = pd.DataFrame((data.iloc[38:-1].values.squeeze() + sarima_forecasts.iloc[1:].values.squeeze())/2)
# half

In [None]:
quater = pd.DataFrame((orig.values.squeeze() + half.values.squeeze())/2)
# quater

In [None]:
third_quater = pd.DataFrame((half.values.squeeze() + sarima_forecasts.iloc[1:].values.squeeze())/2)
# third_quater

In [None]:
augmented_ts = np.arange(2*half.shape[0]).tolist()
for i in range(0,half.shape[0]):
    augmented_ts[2*i] = orig.iloc[i][0]
    augmented_ts[2*i+1] = half.iloc[i][0]

## Data Visualization

In [None]:
dates = all_data["date"].values

In [None]:
def reformat_date(date):
    spl = date.split("-")
    return spl[1]+"-"+spl[0]+"-"+spl[2]

In [None]:
dates = [reformat_date(date) for date in dates]

In [None]:
ma = all_data.new_deaths.rolling(window=7, center=True).mean()

In [None]:
xtick = np.arange(0, 582, 14)
ytick = np.arange(0, 5000, 500)
plt.figure(num=0, figsize=(27,7),dpi=100)
# plt.plot(dat.values.squeeze())
plt.plot(dates, data.values)
plt.plot(ma.values, "r")
plt.legend(['Original Time Series', '7-day Moving Average'])
plt.xlabel('Date')
plt.ylabel('Deaths')
# x1,x2,y1,y2 = plt.axis()
# plt.axis((x1,x2,0,101))
plt.yticks(ytick)
plt.xticks(xtick, rotation=25)
plt.title("Time Series Plot - Daily Deaths")
plt.show()

In [None]:
plt.figure(num=0, figsize=(27,7),dpi=100)
plt.plot(augmented_ts)
plt.legend(['augmented time series'])
plt.xlabel('Time')
plt.ylabel('Deaths/Hosp')
# x1,x2,y1,y2 = plt.axis()
# plt.axis((x1,x2,0,101))
# plt.yticks(np.arange(0, 101, 10.0))
plt.title("Time Series Plot - Response variable")
plt.show()

## ACF Plot

In [None]:
import statsmodels.api as sm
import statsmodels.tsa.api as tsa
fig = sm.graphics.tsa.plot_acf(orig.values.squeeze())
# fig = sm.graphics.tsa.plot_acf(x=np.array(augmented_ts))
# fig.suptitle("ACF Plot")
axes = fig.axes
axes[0].set_title("Auto Correlation Function Plot - New Deaths")
axes[0].set_xlabel("Lags")
axes[0].set_ylabel("Correlation Coefficient")
axes[0].set_xticks(np.arange(0, 31, 3))
fig._set_dpi(120)
fig.set_size_inches(10,5)
fig.show()
axes[0].set_yticks(np.arange(0, 1.1, 0.1))
fig.savefig("./COVID/ACFPlot.png")

In [None]:
import statsmodels
import statsmodels.api as sm
import statsmodels.tsa.api as tsa
fig = sm.graphics.tsa.plot_pacf(orig.values.squeeze())
# fig = sm.graphics.tsa.plot_acf(x=np.array(augmented_ts))
# fig.suptitle("ACF Plot")
axes = fig.axes
axes[0].set_title("Partial Auto Correlation Function - New Deaths")
axes[0].set_xlabel("Lags")
axes[0].set_ylabel("Correlation Coefficient")
axes[0].set_xticks(np.arange(0, 31, 3))
fig._set_dpi(120)
fig.set_size_inches(10,5)
fig.show()
axes[0].set_yticks(np.arange(-0.4, 1.1, 0.1))
fig.savefig("./COVID/PACFPlot.png")


In [None]:
import statsmodels.api as sm
import statsmodels.tsa.api as tsa
fig = sm.graphics.tsa.plot_acf(np.array(augmented_ts))
# fig = sm.graphics.tsa.plot_acf(x=np.array(augmented_ts))
# fig.suptitle("ACF Plot")
axes = fig.axes
axes[0].set_title("Autocorrelation Plot - Augmented Series")
axes[0].set_xlabel("Lags")
axes[0].set_ylabel("Correlation Coefficient")
axes[0].set_xticks(np.arange(0, 31, 3))
fig._set_dpi(120)
fig.set_size_inches(10,5)
fig.show()
# axes[0].set_yticks(np.arange(0, 1, 0.1))
# fig.savefig("./COVID/Test.png")

In [None]:
import statsmodels.api as sm
import statsmodels.tsa.api as tsa
fig = sm.graphics.tsa.plot_pacf(np.array(augmented_ts))
# fig = sm.graphics.tsa.plot_acf(x=np.array(augmented_ts))
# fig.suptitle("ACF Plot")
axes = fig.axes
axes[0].set_title("Autocorrelation Plot - Augmented Series")
axes[0].set_xlabel("Lags")
axes[0].set_ylabel("Correlation Coefficient")
axes[0].set_xticks(np.arange(0, 31, 3))
fig._set_dpi(120)
fig.set_size_inches(10,5)
fig.show()
# axes[0].set_yticks(np.arange(0, 1, 0.1))
# fig.savefig("./COVID/Test.png")

<hr style="border:5px solid white"> </hr>

# Neural Nets

In [None]:
augmented_ts = np.array(augmented_ts)
augmented_ts = np.reshape(augmented_ts,(-1,2))
augmented_ts = np.reshape(augmented_ts,(-1,1))
augmented_ts.shape

### Generating X and Y matrices

In [None]:
elim_samples = 0
lags = 52
horizons = 28

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(augmented_ts)
sc = MinMaxScaler(feature_range = (-1, 1))

In [None]:
lagX, lagY = ts2IF(np.array(orig),lags,horizons)

In [None]:
lagX, lagY = ts2IF(np.array(augmented_ts),lags,horizons)

In [None]:
X = pd.DataFrame(lagX)
X.columns = lagX.columns
feat = X.shape[1]
print(X.shape)

In [None]:
time = [float(x) for x in range(1,X.shape[0]+1)]
X['time'] = time

In [None]:
Y = pd.DataFrame(rescale(lagY,"linear"))

## Network Architecture Search - RNN

In [None]:
%%time
import autokeras as ak
rsX = pd.DataFrame(rescale(X,"linear"))
lX = rsX.values.reshape(rsX.shape[0],rsX.shape[1],1) #(batch_size, timestep/window, no. features/lags)
lY = Y.values.reshape(Y.shape[0],Y.shape[1],1)
input_layer = ak.Input()
rnn_layer = ak.RNNBlock(layer_type="lstm")(input_layer)
dense_layer = ak.DenseBlock()(rnn_layer)
output_layer = ak.RegressionHead()(dense_layer)

automodel = ak.AutoModel(input_layer, output_layer, max_trials=10, loss='mean_absolute_percentage_error',metrics='mean_absolute_percentage_error',overwrite=True,objective='val_loss', project_name='NAS')
automodel.fit(lX, Y.values, validation_split=0.4, epochs=500, batch_size=32, verbose=0)
model = automodel.export_model()
print(model.summary())
rSq1, mape1, smape1, mse1, rmse1, sse1, sst1, beats, prediction, y_e = rollingValidate(model,True,lX,Y,-1,horizons,callbk=1,nEpoch=1
                                                                                       ,nBatch=32,plot=True,p1=True,p2=True)

In [None]:
true_pred = [x for ind,x in enumerate(prediction[:,1].squeeze().tolist()) if (ind+1)%2==0]
true_data = [x for ind,x in enumerate(y_e.values.squeeze()[:,1].tolist()) if (ind+1)%2==0]
dates = range(1, len(true_data) + 1)
# ytick = np.arange(0, 5000, 500)
# xtick = np.arange(0, 133, 7)
plt.figure(num=0, figsize=(20,8))
plt.plot(dates,true_data)
plt.plot(dates,true_pred)
plt.legend(['True Value','Forecasted Value'])
plt.xlabel('Days')
plt.ylabel('Daily Deaths')
# plt.axis((x1,x2,0,101))
# plt.xticks(xtick)
# plt.yticks(ytick)
plt.title("Time Series Plot - Response vs Out-Of-Sample Forecasts(h=1)")
plt.show()

<hr style="border:2px solid white"> </hr>

## Network Architecture Search - Neural Nets

In [None]:
feat = X.shape[1]
l1 = X.columns.tolist()
l2 = ['numerical' for x in l1]
# print(l1)
# print(l2)
col_typ = dict(zip(l1,l2))

In [None]:
import autokeras as ak
reg = ak.StructuredDataRegressor(max_trials=500,column_names=l1,column_types=col_typ, output_dim=Y.shape[1], loss='mean_absolute_percentage_error',metrics='mean_absolute_percentage_error',overwrite=True,objective='val_loss', project_name='NAS')
reg.fit(x=X, y=Y, epochs=200, validation_split=0.6, validation_data=None, verbose=0)
model = reg.export_model()
print(model.summary())

In [None]:
rSq1, mape1, smape1, mse1, rmse1, sse1, sst1, beats, prediction, y_e = rollingValidate(model,True,X,Y, 5,horizons,callbk=0,nEpoch=200
                                                                        ,nBatch=32,plot=False,p1=True,p2=True, shuffle=True)

In [None]:
model = Sequential()
model.add(Dense(32,input_dim=feat, activation='linear', kernel_initializer='he_normal'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.ReLU())
# model.add(Dropout(0.1))
model.add(Dense(1024,activation='linear', kernel_initializer='he_normal'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.ReLU())
# model.add(Dropout(0.1))
model.add(Dense(Y.shape[1],activation='linear'))
model.compile(loss='mean_absolute_percentage_error', optimizer='Adam', metrics=['mean_absolute_percentage_error'])

In [None]:
%%time
rSq1, mape1, smape1, mse1, rmse1, sse1, sst1, mae1, beats, prediction,fm, y_e = rollingValidate(model,True,X,Y,5,horizons,callbk=0,nEpoch=200
                                                                        ,nBatch=32,plot=False,p1=True,p2=True, shuffle=True)

In [None]:
true_pred = [x for ind,x in enumerate(prediction[:,0].squeeze().tolist()) if (ind+1)%2==0]
true_data = [x for ind,x in enumerate(y_e.values.squeeze()[:,0].tolist()) if (ind+1)%2==0]
dates = range(1, len(true_data) + 1)
# ytick = np.arange(0, 5000, 500)
# xtick = np.arange(0, 133, 7)
plt.figure(num=0, figsize=(20,8))
plt.plot(dates,true_data)
plt.plot(dates,true_pred)
plt.legend(['True Value','Forecasted Value'])
plt.xlabel('Days')
plt.ylabel('Daily Deaths')
# plt.axis((x1,x2,0,101))
# plt.xticks(xtick)
# plt.yticks(ytick)
plt.title("Time Series Plot - Response vs Out-Of-Sample Forecasts(h=1)")
plt.show()

In [None]:
# Data Pre-Processing for LSTM
import copy
X_np = copy.deepcopy(X.values.tolist())
for i in range(X.shape[0]):
    new_dim = copy.copy(X_np[i])
#     print(new_dim)
#     if i==5: break
    for j in range(X.shape[1]):
        X_np[i][j] = new_dim
#         print(X_np[i][j])
X_np1 = np.array(X_np)
print(X_np1.shape)
lX = rescale(X_np1,"linear")
# lX = rsX.values.reshape(rsX.shape[0],rsX.shape[1],1) #(batch_size, timestep/window, no. features/lags)
print(lX.shape)

In [None]:
#Build RNN - Generic
from tensorflow.keras.layers import TimeDistributed, Bidirectional, RepeatVector
from tensorflow.keras.optimizers import Adamax

def buildRNN(modelName,nUnits,act_func='linear',optimzer=Adamax(learning_rate=0.005, beta_1=0.9, beta_2=0.999, epsilon=1e-07)):
#     print("Create a " + modelName + " with "+ str(nUnits) +" units and 10 output nodes")
    model = Sequential()
    if modelName == 'GRU':
        model.add(GRU(nUnits, input_shape=(lX.shape[1], lX.shape[2])))
    if modelName == 'LSTM':
        model.add(GRU(256, activation='relu', return_sequences=True, input_shape=(lX.shape[1], lX.shape[2])))
#         model.add(RepeatVector(28))
        model.add(GRU(92, activation='relu', return_sequences=True))
#         model.add(RepeatVector(28))
        model.add(GRU(128, activation='relu'))
#         model.add(RepeatVector(28))
    model.add(Dense(28))  
    model.compile(loss='mean_absolute_percentage_error', optimizer='adamax', metrics=['mean_absolute_percentage_error'])
#     model.compile(loss='mean_squared_error', optimizer=optimzer, metrics=['mean_squared_error'])
    model.build((None,lX.shape[1], lX.shape[2]))
    print(model.summary())
    return model