In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.models import Sequential
from keras.preprocessing.sequence import TimeseriesGenerator
from matplotlib.pylab import rcParams
rcParams['figure.figsize']=8,6
from keras.layers import Dense
from keras.layers import LSTM
import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [None]:
#Describing the dataset and set index to Date
df=pd.read_excel(r'dataset/Data_V3.xlsx',engine='openpyxl')
df.Date = pd.to_datetime(df.Date)
df = df.set_index("Date")
dataset = df.filter(['Ish.L.TrafficVolume.DL.GB(GB)'])
print(dataset.head())

print(dataset.shape)

In [None]:
ax = dataset.plot(figsize=(14, 7))

In [None]:
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

In [None]:
# split into train and test sets
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(len(train), len(test))

In [None]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

In [None]:
# reshape into X=t and Y=t+1
look_back = 30
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
print(trainX.shape)

In [None]:
# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

In [None]:
from keras.layers import Dropout
from keras.layers import Bidirectional
model=Sequential()
model.add(LSTM(50,activation='relu',return_sequences=True,input_shape=(look_back,1)))
model.add(LSTM(50, activation='relu', return_sequences=True))
model.add(LSTM(50, activation='sigmoid', return_sequences=False))
model.add(Dense(50))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer='adam',loss='mean_squared_error')

In [None]:
Xdata_train=[]
Ydata_train=[]
train = dataset

Xdata_train, Ydata_train = create_dataset(dataset, look_back)
Xdata_train = np.reshape(Xdata_train, (Xdata_train.shape[0], Xdata_train.shape[1], 1))

#training for all data
history = model.fit(Xdata_train,Ydata_train,batch_size=1,epochs=200,shuffle=False)

In [None]:
plt.figure(figsize=(8,5))
plt.plot(history.history['loss'])
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Model Accuracy")

In [None]:
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

In [None]:
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])

In [None]:
# calculate root mean squared error
def evaluate_prediction(predictions, actual):
    errors = predictions - actual
    mse = np.square(errors).mean()
    rmse = np.sqrt(mse)
    mae = np.abs(errors).mean()
    print('Mean Absolute Error: {:.4f}'.format(mae))
    print('Root Mean Square Error: {:.4f}'.format(rmse))
    
print('Train Score:')
evaluate_prediction(trainPredict,trainY)
print('\nTest Score:')
evaluate_prediction(testPredict,testY)

In [None]:
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

In [None]:
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict

In [None]:
# plot baseline and predictions
plt.figure(figsize=(16,8))
plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.legend(['Train','Train Predict','Test Predict'],loc='best')
plt.show()

In [None]:
look_back = 30
#Creating future dates
from pandas.tseries.offsets import DateOffset
add_dates = [df.index[-1] + DateOffset(days=x) for x in range(0,look_back+1) ]
future_dates = pd.DataFrame(index=add_dates[1:],columns=df.columns)

In [None]:
pred_list = []
batch = train[-look_back:].reshape((1, look_back, 1))
for i in range(look_back):
    pred_list.append(model.predict(batch)[0])
    batch = np.append(batch[:,1:,:],[[pred_list[i]]],axis=1)
df_predict = pd.DataFrame(scaler.inverse_transform(pred_list),
                          index=future_dates[-look_back:].index, columns=['Prediction'])
df_proj = pd.concat([df,df_predict], axis=1)
df_proj.to_csv("predicted/Ish.L.TrafficVolume.DL.GB(GB)-"+str(look_back)+"days")

In [None]:
plt.figure(figsize=(20, 10))
plt.plot(df_proj.index, df_proj['Ish.L.TrafficVolume.DL.GB(GB)'])
plt.plot(df_proj.index, df_proj['Prediction'], color='r')
plt.legend(['Actual','Predictions'],loc='best')
plt.xlabel('Date')
plt.ylabel('Data volume in GB')
plt.xticks(fontsize=18)
plt.yticks(fontsize=16)


In [None]:
#save the model

# serialize model to JSON
model_json = model.to_json()
with open("models/Ish.L.TrafficVolume.DL.GB(GB)_trainedModel.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("models/Ish.L.TrafficVolume.DL.GB(GB)_trainedModel.h5")
print("Saved model to disk")