In [1]:
import os, inspect, sys
import pandas as pd
import numpy as np
import itertools
import matplotlib.pyplot as plt
import seaborn as sns
from pprint import pprint

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib 
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

import datetime as dt
from dateutil.relativedelta import relativedelta

import plotly.plotly as py
import plotly.graph_objs as go


CURRENT_DIR = os.path.dirname(inspect.getabsfile(inspect.currentframe()))
ROOT_DIR = os.path.dirname(CURRENT_DIR)
sys.path.insert(0, ROOT_DIR)

from reb.src.pyts import series_to_supervised

Using TensorFlow backend.


In [2]:
# monthly date range generator
def month_range(start_date, n_months):
    for m in range(n_months):
        yield start_date + relativedelta(months=+m)
        
# get all combinations of input iterable x
def get_combinations(x):
    rval = []
    for L in range(1, len(x)+1):
        for subset in itertools.combinations(x, L):
            rval.append(list(subset))
            
    return rval      

In [3]:
# read data
ffname = os.path.join(ROOT_DIR, "reb", "data", "ext", "data_monthly_processed.csv")
df_original = pd.read_csv(ffname, parse_dates=["DATE"])
df_original.DATE = pd.to_datetime(df_original.DATE, format="%Y-%m")
df_original.head()

# Make a clean copy of data
df = df_original.copy() 

# Reindex data frame per the time stamps
df.set_index("DATE", inplace=True)
df.head()

Unnamed: 0_level_0,PAYEMS,AWHNONAG,CES9091000001,USGOVT,UNEMPLOY,LNS13023706,MVPHGFD027MNFRBDAL,MNFCTRIRSA,MORTGAGE30US,MORTGAGE15US,DGS10,INDPRO,CMRMTSPL,W875RX1
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1992-01-01,108374,34.1,3137,18688,9283,11.9,2719.3,1.66,8.4,8.0,7.0,63.3372,757479.217,6840.9
1992-02-01,108317,34.1,3121,18689,9454,9.8,2738.8,1.65,8.8,8.4,7.3,63.7864,759384.431,6879.1
1992-03-01,108369,34.1,3121,18715,9460,9.5,2760.5,1.58,8.9,8.6,7.5,64.3199,762440.896,6884.7
1992-04-01,108526,34.3,3117,18739,9415,10.4,2764.0,1.57,8.9,8.5,7.5,64.8069,767521.889,6903.6
1992-05-01,108653,34.3,3119,18753,9744,10.3,2830.7,1.55,8.7,8.3,7.4,65.0202,760844.122,6941.7


In [4]:
# load scaler
ffname = os.path.join(ROOT_DIR, "reb", "data", "int", "monthly.scaler.save")
scaler = joblib.load(ffname)

# USRECM: NBER based Recession Indicators for the United States from the Peak through the Trough
# index_target = NA

# GDPC1: Real Gross Domestic Product
# index_target = NA

# W875RX1: Real personal income excluding current transfer receipts
# index_target = 13

# PAYEMS: All Employees: Total Nonfarm Payrolls
index_target = 0

# INDPRO: Industrial Production Index
# index_target = 11

# CMRMTSPL: Real Manufacturing and Trade Industries Sales
# index_target = 12

index_features = [8, 2]
index_features.sort()

In [5]:

# set model parameters
n_lags = 6
n_sequences = 6
n_units = 10
# load model
fname = 'f.' +'.'.join([str(elem) for elem in index_features]) + \
    f'.t.{index_target}.l.{n_lags}.s.{n_sequences}.u.{n_units}' + '.h5'
ffname = os.path.join(ROOT_DIR, "reb", "data", "int", fname)
model = load_model(ffname)

In [6]:
# rescale data
all_values = df.values.astype("float32")
all_values_scaled = scaler.fit_transform(all_values)
all_values_scaled.shape

(319, 14)

In [7]:
# get latest data
x_scaled = all_values_scaled[-n_lags:, index_features + [index_target]]
n_variables = x_scaled.shape[1]
# reshape x as per lstm input format
x_scaled = x_scaled.reshape((1, n_lags, n_variables))
all_values_scaled.shape

(319, 14)

In [8]:
# forecast
yhat_scaled = model.predict(x_scaled)
yhat_scaled.shape

(1, 6)

In [9]:
# invert scaling
temp = all_values_scaled[-n_sequences:, :]
n_allvars = temp.shape[1]
temp = temp.reshape((1, -1))
temp[:, index_target:n_sequences*n_allvars:n_allvars] = yhat_scaled.reshape((1, n_sequences))
temp = temp.reshape((-1, n_allvars))
yhat = scaler.inverse_transform(temp)[:, index_target]
yhat_scaled.shape

(1, 6)

In [10]:
# create forecast data frame
df_forecast = pd.DataFrame()
start_date = df.index[-1] 
mrange = month_range(start_date, n_sequences+1)
df_forecast["DATE"] = [d.strftime('%Y-%m-%d') for d in mrange]
df_forecast["VALUE"] = list(df.values[-1:, index_target]) + list(yhat)
df_forecast.set_index("DATE", inplace=True)
df_forecast.head(n_sequences)

Unnamed: 0_level_0,VALUE
DATE,Unnamed: 1_level_1
2018-07-01,149078.0
2018-08-01,142197.84375
2018-09-01,141208.6875
2018-10-01,144338.921875
2018-11-01,143708.84375
2018-12-01,143680.703125


In [11]:
# store data as dict
data = {"current": [{"t": t, "x": x} for t, x in zip(list(df.index.astype(str).values),
                                     df.iloc[:, index_target].astype("float32"))],
        "predict": [{"t": t, "x": x} for t, x in zip(list(df_forecast.index.astype(str).values),
                                     df_forecast.VALUE.astype("float32"))]}
data

{'current': [{'t': '1992-01-01', 'x': 108374.0},
  {'t': '1992-02-01', 'x': 108317.0},
  {'t': '1992-03-01', 'x': 108369.0},
  {'t': '1992-04-01', 'x': 108526.0},
  {'t': '1992-05-01', 'x': 108653.0},
  {'t': '1992-06-01', 'x': 108718.0},
  {'t': '1992-07-01', 'x': 108792.0},
  {'t': '1992-08-01', 'x': 108927.0},
  {'t': '1992-09-01', 'x': 108959.0},
  {'t': '1992-10-01', 'x': 109145.0},
  {'t': '1992-11-01', 'x': 109277.0},
  {'t': '1992-12-01', 'x': 109494.0},
  {'t': '1993-01-01', 'x': 109804.0},
  {'t': '1993-02-01', 'x': 110051.0},
  {'t': '1993-03-01', 'x': 109997.0},
  {'t': '1993-04-01', 'x': 110304.0},
  {'t': '1993-05-01', 'x': 110573.0},
  {'t': '1993-06-01', 'x': 110749.0},
  {'t': '1993-07-01', 'x': 111055.0},
  {'t': '1993-08-01', 'x': 111210.0},
  {'t': '1993-09-01', 'x': 111455.0},
  {'t': '1993-10-01', 'x': 111741.0},
  {'t': '1993-11-01', 'x': 111991.0},
  {'t': '1993-12-01', 'x': 112314.0},
  {'t': '1994-01-01', 'x': 112590.0},
  {'t': '1994-02-01', 'x': 112788.0},
 

In [12]:
"2,3".split(",")

['2', '3']