In [None]:
import os
import gc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

#### Feature Engineering

In [None]:
# Prepare scalars to normalize data
input_scaler = MinMaxScaler()
output_scaler = StandardScaler()

In [None]:
calendar_df = pd.read_csv('/mnt/c/Users/NYCZE/Downloads/m5-forecasting-accuracy/calendar.csv')
prices_df = pd.read_csv('/mnt/c/Users/NYCZE/Downloads/m5-forecasting-accuracy/sell_prices.csv')
sales_df = pd.read_csv('/mnt/c/Users/NYCZE/Downloads/m5-forecasting-accuracy/sales_train_validation.csv')
submission_df = pd.read_csv('/mnt/c/Users/NYCZE/Downloads/m5-forecasting-accuracy/sample_submission.csv')

In [None]:
sales_df
#submission_df[submission_df['id'] == 'FOODS_3_823_WI_3_validation']

In [None]:
def train_test(df, train_steps, test_steps):
    num_steps = train_steps + test_steps
    cols = len(df.columns)
    iterations = cols//num_steps
    
    test_range = []
    train_range = []
    
    for i in range(iterations):
        train_start = cols - (i+1)*num_steps
        train_end = train_start + train_steps
        test_start = train_end
        test_end = test_start + test_steps
        
        train_data = df.iloc[:, train_start:train_end].to_numpy()
        test_data = df.iloc[:, test_start:test_end].to_numpy()
        train_range.extend(train_data)
        test_range.extend(test_data)
    
    return train_range, test_range

In [None]:
X_data, y_data = train_test(sales_df.iloc[:, 6:], 100, 28)

In [None]:
print('Train data shape: ', len(X_data))
print('Text data shape: ', len(y_data))

In [None]:
X_tensors = np.array(X_data)
input_scaler.fit(X_tensors)
X_tensors = input_scaler.transform(X_tensors)
X_tensors = X_tensors.reshape((X_tensors.shape[0], X_tensors.shape[1], 1))

In [None]:
Y_tensors = np.array(y_data)
output_scaler.fit(Y_tensors)
Y_tensors = output_scaler.transform(Y_tensors)

#### Train/Test Sets

In [None]:
train_perc = 0.8
n = int(X_tensors.shape[0]*train_perc)
train_index = np.random.choice(X_tensors.shape[0], n, replace=False)
test_index = np.asarray(list(set(range(n)) - set(train_index)))

In [None]:
np.intersect1d(train_index, test_index)

In [None]:
X_train = X_tensors[train_index]
X_test = X_tensors[test_index]
y_train = Y_tensors[train_index]
y_test = Y_tensors[test_index]
print('Train data input samples: ', len(X_train))
print('Test data input samples: ', len(X_test))
print('Train data output samples: ', len(y_train))
print('Test data output samples: ', len(y_test))

#### Model Building

In [None]:
from keras import backend as K

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import Reshape
from keras.layers import LSTM
from tensorflow.compat.v1.keras.layers import CuDNNLSTM 
from keras.layers import Conv1D
from keras.utils import to_categorical
from keras.layers import MaxPooling1D
from keras.layers import  GlobalAveragePooling1D
from keras.utils import to_categorical

import tensorflow as tf

In [None]:
def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))

def rmse(predictions, targets):
    return np.sqrt(np.mean((predictions-targets)**2))

In [None]:
steps_in = X_train.shape[1]
n_features = X_train.shape[2]
steps_out = y_train.shape[1]

In [None]:
model = Sequential()
model.add(LSTM(25, return_sequences=True, input_shape=(steps_in, n_features)))
model.add(LSTM(25))

In [None]:
model.add(Dense(steps_out))
model.compile(optimizer='adam', loss=root_mean_squared_error)

In [None]:
model.fit(X_train, y_train, epochs=1, verbose=1)

#### Model Prediction

In [None]:
raw_preds = model.predict(X_test)

In [None]:
all_pred = output_scaler.inverse_transform(raw_preds)
all_pred = np.round(np.abs(all_pred))

In [None]:
def rmse(predictions, targets):
    return np.sqrt(np.mean((predictions-targets)**2))

In [None]:
rmse(all_pred, y_test)

#### Predict and create submission.csv file

In [None]:
def submission_set(df, steps):
    
    cols = len(df.columns)
    submit_data = df.iloc[:, (cols - steps):cols].to_numpy()
    
    return submit_data 

In [None]:
# Set the number of steps to be the same as the training data
submission_set = submission_set(sales_df, 100)

In [None]:
# Scale the predictions
submission_tensors = input_scaler.transform(submission_set)

In [None]:
# Reshape the input tensors into the same dimensions as the training data
submission_tensors = submission_tensors.reshape((submission_tensors.shape[0], submission_tensors.shape[1], 1))

In [None]:
# Make our predictions
submission_preds = model.predict(submission_tensors)

In [None]:
# Scale and round our predictions
submission_preds = output_scaler.inverse_transform(submission_preds)
submission_preds = np.round(np.abs(submission_preds))

In [None]:
# Create a dataframe for the predictions
submission_data = pd.DataFrame(data=submission_preds,
                              columns=[F for F in submission_df.columns[1:]])
submission_data.insert(0, 'id', submission_df['id'][:30490])
submission_data.head()

In [None]:
# Concatenate this dataframe with the evaluation observations 
validation_df = pd.concat([submission_data, submission_df[30490:]])
validation_df.head()

In [None]:
# Export to Excel
validation_df.to_csv('submission.csv', index=False)