In [2]:
import numpy as np   
import pandas as pd  

import os 
import re
import warnings
from tqdm import tqdm
import datetime as dt

from sklearn.preprocessing import MinMaxScaler, OrdinalEncoder, LabelEncoder
from sklearn.metrics import mean_squared_error, mean_squared_log_error
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, RepeatVector, TimeDistributed
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping


warnings.filterwarnings("ignore")

from numpy.random import seed
seed(1)

# Allows us to see more information regarding the DataFrame
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)

# 1 Data pre-processing

In [9]:
train_data = pd.read_csv('sales_train_validation.csv')
calendar = pd.read_csv('calendar.csv')
sell_prices = pd.read_csv('m5-forecasting-accuracy/sell_prices.csv')
submission_file = pd.read_csv('sample_submission.csv')

## 1.1 Calendar Data

In [11]:
days = range(1, 1970)
time_series_columns = [f'd_{i}' for i in days]
transfer_cal = pd.DataFrame(calendar[['event_name_1','event_type_1','event_name_2','event_type_2','snap_CA','snap_TX','snap_WI']].values.T, index=['event_name_1','event_type_1','event_name_2','event_type_2','snap_CA','snap_TX','snap_WI'], columns= time_series_columns)
transfer_cal = transfer_cal.fillna(0)
event_name_1_se = transfer_cal.loc['event_name_1'].apply(lambda x: x if re.search("^\d+$", str(x)) else np.nan).fillna(10)
event_name_2_se = transfer_cal.loc['event_name_2'].apply(lambda x: x if re.search("^\d+$", str(x)) else np.nan).fillna(10)
calendar['date'] = pd.to_datetime(calendar['date'])
calendar = calendar[calendar['date']>= '2016-2-01']  # reduce memory
calendar= transform(calendar)
# Attempts to convert events into time series data.
transfer_cal = pd.DataFrame(calendar[['event_name_1','event_type_1','event_name_2','event_type_2','snap_CA','snap_TX','snap_WI']].values.T,
                            index=['event_name_1','event_type_1','event_name_2','event_type_2','snap_CA','snap_TX','snap_WI'])

## 1.2 Sell Price Data

In [15]:
price_fea = calendar[['wm_yr_wk','date']].merge(sell_prices, on = ['wm_yr_wk'], how = 'left')
price_fea['id'] = price_fea['item_id']+'_'+price_fea['store_id']+'_validation'
df = price_fea.pivot('id','date','sell_price')
price_df = train_data.merge(df,on=['id'],how= 'left').iloc[:,-140:] # -145: starts dataframe column at 2016-01-27 
price_df.index = train_data.id
train_data = downcast_dtypes(train_data)
train_data = train_data.iloc[:, -140:]

## 1.3 Merge and backfill na

In [21]:
time_series_col1 = train_data.columns
time_series_col2 = price_df.columns
time_series_col3 = transfer_cal.columns
price_df.columns = time_series_col1
transfer_cal.columns = time_series_col1
full_train_data = pd.concat([train_data, transfer_cal, price_df], axis=0)

In [25]:
full_train_data.fillna(method='backfill', axis=1, inplace=True)
np.sum(full_train_data.isnull().sum())

In [26]:
full_train_data_transposed = full_train_data.T
full_train_data_transposed.head()

In [27]:
object_cols = [cname for cname in full_train_data_transposed.columns 
               if full_train_data_transposed[cname].dtype == "object" 
               and cname != "date"]
num_cols = [cname for cname in full_train_data_transposed.columns 
            if full_train_data_transposed[cname].dtype in ['int64', 'float64', 'int16', 'float32']
            and cname not in ['event_name_1','event_type_1','event_name_2','event_type_2','snap_CA','snap_TX','snap_WI']]
all_cols = num_cols + object_cols


# 2 Model Training

## 2.1 Split Sequences

In [6]:
def split_sequences(sequences, timesteps, horizon):
    Sequences, Targets = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + timesteps
        out_end_ix = end_ix + horizon-1
        # check if we are beyond the dataset
        if out_end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1:out_end_ix, -1]
        Sequences.append(seq_x)
        Targets.append(seq_y)
        show_shapes()
    return array(X), array(y)

In [30]:
timesteps = 28
horizon = 28

full_train_data_sequenced = []   

for i in tqdm(range(train_data.shape[0])):    

    full_train_data_sequenced.append([list(t) for t in zip(full_train_data_transposed['event_name_1'][-(100+14):-(14)],
                                       full_train_data_transposed['event_type_1'][-(100+14):-(14)],
                                       full_train_data_transposed['event_name_2'][-(100+14):-(14)],     
                                       full_train_data_transposed['event_type_2'][-(100+14):-(14)],
                                       full_train_data_transposed['snap_CA'][-(100+14):-(14)],
                                       full_train_data_transposed['snap_TX'][-(100+14):-(14)],
                                       full_train_data_transposed['snap_WI'][-(100+14):-(14)],
                                       price_df.iloc[i][-100:],
                                       train_data.iloc[i][-100:])]) 

full_train_data_sequenced = np.asarray(full_train_data_sequenced, dtype=np.float32)

## 2.2 Normalization 

In [None]:
def Normalize(list):
    list = np.array(list)
    low, high = np.percentile(list, [0, 100])
    delta = high - low
    if delta != 0:
        for i in range(0, len(list)):
            list[i] = (list[i]-low)/delta
    return  list,low,high

def FNoramlize(list,low,high):
    delta = high - low
    if delta != 0:
        for i in range(0, len(list)):
            list[i] = list[i]*delta + low
    return list

def Normalize2(list,low,high):
    list = np.array(list)
    delta = high - low
    if delta != 0:
        for i in range(0, len(list)):
            list[i] = (list[i]-low)/delta
    return  list

In [31]:
norm_full_train_data, train_low, train_high = Normalize(full_train_data_sequenced[:,-(timesteps*2):,:])

In [33]:
num_features = 9

X_train = norm_full_train_data[:,-28*2:-28,:]
y_train = norm_full_train_data[:,-28:,8] 

X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], num_features))
y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], 1))

show_shapes(X_train, y_train)

## 2.3 Model Construction

In [34]:
def encoder_decoder_model():
    
    # Use Keras sequential model
    model = Sequential()
    
    # Encoder LSTM layer with Dropout regularisation; Set return_sequences to False since we are feeding last output to decoder layer
    model.add(LSTM(units = 100, activation='relu', input_shape = (X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(0.2))
    
    # The fixed-length output of the encoder is repeated, once for each required time step in the output sequence with the RepeatVector wrapper
    model.add(RepeatVector(horizon))
    
    # Decoder LSTM layer with Dropout regularisation; Set return_sequences to True to feed each output time step to a Dense layer
    model.add(LSTM(units = 100, activation='relu', return_sequences=True))
    model.add(Dropout(0.2))
    
    # Same dense layer is repeated for each output timestep with the TimeDistributed wrapper
    model.add(TimeDistributed(Dense(units=1, activation = "linear")))
    
    return model

In [35]:
model = encoder_decoder_model()
model.summary()

User settings:  

   KMP_AFFINITY=granularity=fine,verbose,compact,1,0  
   KMP_BLOCKTIME=0  
   KMP_DUPLICATE_LIB_OK=True  
   KMP_INIT_AT_FORK=FALSE  
   KMP_SETTINGS=1  
   KMP_WARNINGS=0  

Effective settings:  

   KMP_ABORT_DELAY=0  
   KMP_ADAPTIVE_LOCK_PROPS='1,1024'  
   KMP_ALIGN_ALLOC=64  
   KMP_ALL_THREADPRIVATE=128  
   KMP_ATOMIC_MODE=2  
   KMP_BLOCKTIME=0  
   KMP_CPUINFO_FILE: value is not defined  
   KMP_DETERMINISTIC_REDUCTION=false  
   KMP_DEVICE_THREAD_LIMIT=2147483647  
   KMP_DISP_NUM_BUFFERS=7  
   KMP_DUPLICATE_LIB_OK=true  
   KMP_ENABLE_TASK_THROTTLING=true  
   KMP_FORCE_REDUCTION: value is not defined  
   KMP_FOREIGN_THREADS_THREADPRIVATE=true  
   KMP_FORKJOIN_BARRIER='2,2'  
   KMP_FORKJOIN_BARRIER_PATTERN='hyper,hyper'  
   KMP_GTID_MODE=3  
   KMP_HANDLE_SIGNALS=false  
   KMP_HOT_TEAMS_MAX_LEVEL=1  
   KMP_HOT_TEAMS_MODE=0  
   KMP_INIT_AT_FORK=true  
   KMP_LIBRARY=throughput  
   KMP_LOCK_KIND=queuing  
   KMP_MALLOC_POOL_INCR=1M  
   KMP_NUM_LOCKS_IN_BLOCK=1  
   KMP_PLAIN_BARRIER='2,2'  
   KMP_PLAIN_BARRIER_PATTERN='hyper,hyper'  
   KMP_REDUCTION_BARRIER='1,1'  
   KMP_REDUCTION_BARRIER_PATTERN='hyper,hyper'  
   KMP_SCHEDULE='static,balanced;guided,iterative'  
   KMP_SETTINGS=true  
   KMP_SPIN_BACKOFF_PARAMS='4096,100'  
   KMP_STACKOFFSET=64  
   KMP_STACKPAD=0  
   KMP_STACKSIZE=8M  
   KMP_STORAGE_MAP=false  
   KMP_TASKING=2  
   KMP_TASKLOOP_MIN_TASKS=0  
   KMP_TASK_STEALING_CONSTRAINT=1  
   KMP_TEAMS_THREAD_LIMIT=4  
   KMP_TOPOLOGY_METHOD=all  
   KMP_USE_YIELD=1  
   KMP_VERSION=false   
   KMP_WARNINGS=false  
   OMP_AFFINITY_FORMAT='OMP: pid %P tid %i thread %n bound to OS proc set {%A}'  
   OMP_ALLOCATOR=omp_default_mem_alloc  
   OMP_CANCELLATION=false  
   OMP_DEFAULT_DEVICE=0  
   OMP_DISPLAY_AFFINITY=false  
   OMP_DISPLAY_ENV=false  
   OMP_DYNAMIC=false  
   OMP_MAX_ACTIVE_LEVELS=1  
   OMP_MAX_TASK_PRIORITY=0  
   OMP_NESTED: deprecated; max-active-levels-var=1  
   OMP_NUM_THREADS: value is not defined  
   OMP_PLACES: value is not defined  
   OMP_PROC_BIND='intel'  
   OMP_SCHEDULE='static'  
   OMP_STACKSIZE=8M  
   OMP_TARGET_OFFLOAD=DEFAULT  
   OMP_THREAD_LIMIT=2147483647  
   OMP_WAIT_POLICY=PASSIVE  
   KMP_AFFINITY='verbose,warnings,respect,granularity=fine,compact,1,0'  

2021-12-13 12:01:11.707844: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.  
Model: "sequential"  
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm (LSTM)                  (None, 100)               44000     
_________________________________________________________________
dropout (Dropout)            (None, 100)               0         
_________________________________________________________________
repeat_vector (RepeatVector) (None, 28, 100)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 28, 100)           80400     
_________________________________________________________________
dropout_1 (Dropout)          (None, 28, 100)           0         
_________________________________________________________________
time_distributed (TimeDistri (None, 28, 1)             101       
=================================================================
Total params: 124,501  
Trainable params: 124,501  
Non-trainable params: 0  
_________________________________________________________________

## Model Settings  
- optimazation: Adam
- Loss: mean_squared_error

In [36]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics = ['accuracy'])

In [37]:
his=model.fit(X_train,y_train,epochs=15,batch_size=1000,verbose=2)

2021-12-12 12:01:12.227976: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR   Optimization Passes are enabled (registered 2)  
Epoch 1/15  
31/31 - 21s - loss: 1.8586e-04 - accuracy: 0.5628  
Epoch 2/15  
31/31 - 17s - loss: 1.0374e-04 - accuracy: 0.5628  
Epoch 3/15  
31/31 - 16s - loss: 8.9260e-05 - accuracy: 0.5628  
Epoch 4/15  
31/31 - 17s - loss: 8.5961e-05 - accuracy: 0.5628   
Epoch 5/15  
31/31 - 16s - loss: 8.4330e-05 - accuracy: 0.5628  
Epoch 6/15  
31/31 - 17s - loss: 8.4499e-05 - accuracy: 0.5628  
Epoch 7/15  
31/31 - 16s - loss: 8.4180e-05 - accuracy: 0.5628  
Epoch 8/15  
31/31 - 17s - loss: 8.3415e-05 - accuracy: 0.5628  
Epoch 9/15  
31/31 - 16s - loss: 8.3446e-05 - accuracy: 0.5628  
Epoch 10/15  
31/31 - 17s - loss: 8.2113e-05 - accuracy: 0.5628  
Epoch 11/15  
31/31 - 16s - loss: 8.2478e-05 - accuracy: 0.5628  
Epoch 12/15  
31/31 - 17s - loss: 8.1590e-05 - accuracy: 0.5628  
Epoch 13/15  
31/31 - 16s - loss: 8.2032e-05 - accuracy: 0.5628  
Epoch 14/15  
31/31 - 17s - loss: 8.2135e-05 - accuracy: 0.5628  
Epoch 15/15  
31/31 - 16s - loss: 8.1992e-05 - accuracy: 0.5628  

## Accuracy and loss

In [38]:
plt.plot(his.history['loss'])
plt.plot(his.history['accuracy'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['loss','accuracy'])
plt.show()

# Prediction

In [41]:
test_input = np.array(X_train[:, -timesteps:, :]) 
test_input = test_input.reshape((X_train.shape[0], timesteps, num_features)) 
y_test = model.predict(test_input[:,-timesteps:, :], verbose=2)
test_forecast= np.concatenate((test_input[:,:,8].reshape(test_input.shape[0],test_input.shape[1]), 
                           y_test.astype(np.float32).reshape(test_input.shape[0],test_input.shape[1])),axis=1).reshape((test_input.shape[0],test_input.shape[1]+28,1))
# Reverse normalization
test_forecast = FNoramlize(test_forecast,train_low,train_high)
test_forecast = np.rint(test_forecast)
forecast = pd.DataFrame(test_forecast.reshape(test_forecast.shape[0],test_forecast.shape[1])).iloc[:,-28:]
forecast.columns = [f'F{i}' for i in range(1, forecast.shape[1] + 1)]
forecast[forecast < 0] = 0

In [45]:
train_data = pd.read_csv('sales_train_validation.csv')
validation_ids = train_data['id'].values
evaluation_ids = [i.replace('validation', 'evaluation') for i in validation_ids]
ids = np.concatenate([validation_ids, evaluation_ids])
predictions = pd.DataFrame(ids, columns=['id'])
forecast = pd.concat([forecast]*2).reset_index(drop=True)
predictions = pd.concat([predictions, forecast], axis=1)

In [51]:
predictions.to_csv('submission.csv', index=False) 