# Neural Time Series Forescasting

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as  np
import seaborn as sns
sns.set(style="whitegrid", color_codes=True)

path='../data/kaggle/'
df_train = pd.read_csv(path +'train.csv', index_col=0)
df_train.index = pd.to_datetime(df_train.index)


De manera de utilizar redes neuronales, debemos convertir las series de tiempo en un problema de aprendizaje supervisado. 

In [2]:
def series_to_supervised(data, window=1, lag=1, dropnan=True):
    cols, names = list(), list()
    # Input sequence (t-n, ... t-1)
    for i in range(window, 0, -1):
        cols.append(data.shift(i))
        names += [('%s(t-%d)' % (col, i)) for col in data.columns]
    # Current timestep (t=0)
    cols.append(data)
    names += [('%s(t)' % (col)) for col in data.columns]
    # Target timestep (t=lag)
    cols.append(data.shift(-lag))
    names += [('%s(t+%d)' % (col, lag)) for col in data.columns]
    # Put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # Drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

window = 1
lag = 1
df_train = series_to_supervised(df_train, window=window, lag=lag)

In [3]:
df_train.head()

Unnamed: 0_level_0,store(t-1),item(t-1),sales(t-1),store(t),item(t),sales(t),store(t+1),item(t+1),sales(t+1)
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2013-01-02,1.0,1.0,13.0,1,1,11,1.0,1.0,14.0
2013-01-03,1.0,1.0,11.0,1,1,14,1.0,1.0,13.0
2013-01-04,1.0,1.0,14.0,1,1,13,1.0,1.0,10.0
2013-01-05,1.0,1.0,13.0,1,1,10,1.0,1.0,12.0
2013-01-06,1.0,1.0,10.0,1,1,12,1.0,1.0,10.0


In [4]:
print df_train.shape

(912998, 9)


In [5]:
print df_train.columns

Index([u'store(t-1)', u'item(t-1)', u'sales(t-1)', u'store(t)', u'item(t)',
       u'sales(t)', u'store(t+1)', u'item(t+1)', u'sales(t+1)'],
      dtype='object')


In [6]:
columns_to_drop = [('%s(t+%d)' % (col, lag)) for col in ['item', 'store']]
for i in range(window, 0, -1):
    columns_to_drop += [('%s(t-%d)' % (col, i)) for col in ['item', 'store']]
    
print columns_to_drop

['item(t+1)', 'store(t+1)', 'item(t-1)', 'store(t-1)']


In [7]:
df_train.drop(labels=columns_to_drop, inplace=True, axis=1)
df_train.rename({'store(t)':'store', 'item(t)':'item'}, inplace=True,axis='columns')
df_train.tail()

Unnamed: 0_level_0,sales(t-1),store,item,sales(t),sales(t+1)
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-12-26,51.0,10,50,41,63.0
2017-12-27,41.0,10,50,63,59.0
2017-12-28,63.0,10,50,59,74.0
2017-12-29,59.0,10,50,74,62.0
2017-12-30,74.0,10,50,62,82.0


Incorporamos información foránea al modelo. En este caso, convertimos la fecha en dia, mes, año y día de la semana.

In [8]:
def expand_df(df):
    data = df.copy()
    #data['month'] = data.index.month
    #data['year'] = data.index.year
    data['weekend'] = np.int32(data.index.dayofweek > 3)
    return data

df_train=expand_df(df_train)
df_train.tail()

Unnamed: 0_level_0,sales(t-1),store,item,sales(t),sales(t+1),weekend
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-12-26,51.0,10,50,41,63.0,0
2017-12-27,41.0,10,50,63,59.0,0
2017-12-28,63.0,10,50,59,74.0,0
2017-12-29,59.0,10,50,74,62.0,1
2017-12-30,74.0,10,50,62,82.0,1


A continuacion, separamos la variable dependientes $y$ de las independentes $X$. En este caso, la etiqueta corresponde a la columna : sales(t+1)

In [9]:
labels_col = 'sales(t+%d)' % lag

X = df_train.drop(labels_col, axis=1)
y = df_train[labels_col]

Transformamos las etiquetas store e item en variables categoricas

In [10]:
from sklearn.preprocessing import OneHotEncoder

store_ohe = OneHotEncoder()
item_ohe = OneHotEncoder()

X_store = pd.DataFrame(store_ohe.fit_transform(X.store.values.reshape(-1,1)).toarray())
X_items = pd.DataFrame(item_ohe.fit_transform(X.item.values.reshape(-1,1)).toarray())

X = X.drop(['store','item'], axis=1)

X_store.columns=[u+str(v) for u,v in zip(['store_']*10,range(1,11))]
X_items.columns=[u+str(v) for u,v in zip(['item_']*50,range(1,51))]



In [11]:
X = np.concatenate([X,X_items,X_store],axis=1)
y = y.values

print X.shape
print y.shape

(912998, 63)
(912998,)


In [153]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)
print('Train set shape', X_train.shape)
print('Validation set shape', X_test.shape)

# convertir a tensor
X_train=X_train.reshape((X_train.shape[0],1,X_train.shape[1]))
X_test=X_test.reshape((X_test.shape[0],1,X_test.shape[1]))
print X_train.shape

('Train set shape', (730398, 63))
('Validation set shape', (182600, 63))
(730398, 1, 63)


# LSTM Model Training

In [154]:
import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.utils import np_utils
import itertools
from keras.layers import LSTM
from keras import optimizers
import keras.backend as K

def mean_pred(y_true, y_pred):
    return K.mean(y_pred)

epochs = 40
batch_size = 256
lr = 0.0003

model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(loss='mse',optimizer="rmsprop", metrics=['mape'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 50)                22800     
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 51        
Total params: 22,851
Trainable params: 22,851
Non-trainable params: 0
_________________________________________________________________


In [155]:
history = model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=epochs, verbose=1)

Train on 730398 samples, validate on 182600 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [49]:
model.save('lstm_continuous_input.h5')