In [23]:
import pandas as pd
import numpy as np
from __future__ import division
import warnings
warnings.filterwarnings("ignore")
import pickle
df_sales = pd.read_csv('out.csv')
from keras.models import load_model

In [24]:
df_sales = df_sales.drop('Unnamed: 0', axis=1)

In [25]:
df_sales['invoice_date'] = pd.to_datetime(df_sales['invoice_date'])
df_sales['invoice_date'] = df_sales['invoice_date'].dt.year.astype('str') + '-' + df_sales['invoice_date'].dt.month.astype('str') + '-01'
df_sales['invoice_date'] = pd.to_datetime(df_sales['invoice_date'])
df_sales = df_sales.groupby('invoice_date').quantity.sum().reset_index()

In [26]:
df_sales

Unnamed: 0,invoice_date,quantity
0,2019-12-01,312280
1,2020-01-01,349159
2,2020-02-01,265638
3,2020-03-01,348544
4,2020-04-01,292225
5,2020-05-01,373685
6,2020-06-01,363699
7,2020-07-01,369434
8,2020-08-01,398938
9,2020-09-01,544899


In [27]:
df_sales.columns=['date','sales']

In [28]:
#create a new dataframe to model the difference
df_diff = df_sales.copy()
#add previous sales to the next row
df_diff['prev_sales'] = df_diff['sales'].shift(1)
#drop the null values and calculate the difference
df_diff = df_diff.dropna()
df_diff['diff'] = (df_diff['sales'] - df_diff['prev_sales'])
df_diff.head(10)

Unnamed: 0,date,sales,prev_sales,diff
1,2020-01-01,349159,312280.0,36879.0
2,2020-02-01,265638,349159.0,-83521.0
3,2020-03-01,348544,265638.0,82906.0
4,2020-04-01,292225,348544.0,-56319.0
5,2020-05-01,373685,292225.0,81460.0
6,2020-06-01,363699,373685.0,-9986.0
7,2020-07-01,369434,363699.0,5735.0
8,2020-08-01,398938,369434.0,29504.0
9,2020-09-01,544899,398938.0,145961.0
10,2020-10-01,594443,544899.0,49544.0


In [29]:
#create dataframe for transformation from time series to supervised
df_supervised = df_diff.drop(['prev_sales'],axis=1)
#adding lags
for inc in range(1,3):
    field_name = 'lag_' + str(inc)
    df_supervised[field_name] = df_supervised['diff'].shift(inc)
#drop null values
df_supervised = df_supervised.dropna().reset_index(drop=True)

In [30]:
#import MinMaxScaler and create a new dataframe for LSTM model
from sklearn.preprocessing import MinMaxScaler
df_model = df_supervised.drop(['sales','date'],axis=1)
#split train and test set
test_set = df_model[-6:].values

In [31]:
#apply Min Max Scaler
scaler = pickle.load(open('scaler.sav','rb'))
# reshape test set
test_set = test_set.reshape(test_set.shape[0], test_set.shape[1])
test_set_scaled = scaler.transform(test_set)

In [32]:
X_test = test_set_scaled[:, 1:]
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

In [33]:
model = load_model('model.h5')

In [34]:
y_pred = model.predict(X_test,batch_size=1)

In [35]:
X_test

array([[[-0.67310593,  1.76456811]],

       [[ 0.98262301, -0.54813953]],

       [[-0.11630925,  1.74054817]],

       [[ 0.07261442,  0.22151163]],

       [[ 0.35825317,  0.48265781]],

       [[ 1.75774964,  0.87749169]]])

In [36]:
#reshape y_pred
y_pred = y_pred.reshape(y_pred.shape[0], 1, y_pred.shape[1])
#rebuild test set for inverse transform
pred_test_set = []
for index in range(0,len(y_pred)):
    print (np.concatenate([y_pred[index],X_test[index]],axis=1))
    pred_test_set.append(np.concatenate([y_pred[index],X_test[index]],axis=1))
#reshape pred_test_set
pred_test_set = np.array(pred_test_set)
pred_test_set = pred_test_set.reshape(pred_test_set.shape[0], pred_test_set.shape[2])
#inverse transform
pred_test_set_inverted = scaler.inverse_transform(pred_test_set)

[[ 0.08833814 -0.67310593  1.76456811]]
[[-0.1461207   0.98262301 -0.54813953]]
[[-0.11054715 -0.11630925  1.74054817]]
[[0.01179178 0.07261442 0.22151163]]
[[0.02921257 0.35825317 0.48265781]]
[[-0.00888317  1.75774964  0.87749169]]


In [37]:
result_list = []
sales_dates = list(df_sales[-7:].date)
act_sales = list(df_sales[-7:].sales)
for index in range(0,len(pred_test_set_inverted)):
    result_dict = {}
    result_dict['pred_value'] = int(pred_test_set_inverted[index][0] + act_sales[index])
    result_dict['date'] = sales_dates[index+1]
    result_list.append(result_dict)
df_result = pd.DataFrame(result_list)
#for multistep prediction, replace act_sales with the predicted sales

In [38]:
df_result

Unnamed: 0,pred_value,date
0,311667,2020-05-01
1,376806,2020-06-01
2,369297,2020-07-01
3,383548,2020-08-01
4,414265,2020-09-01
5,557574,2020-10-01


In [39]:
df_sales

Unnamed: 0,date,sales
0,2019-12-01,312280
1,2020-01-01,349159
2,2020-02-01,265638
3,2020-03-01,348544
4,2020-04-01,292225
5,2020-05-01,373685
6,2020-06-01,363699
7,2020-07-01,369434
8,2020-08-01,398938
9,2020-09-01,544899


In [42]:
df1 = df_sales[5::]

In [43]:
df = pd.merge(df1,df_result,on='date')

In [44]:
df['diff'] = df['sales']-df['pred_value']

In [49]:
df['date'] = df['date'].astype(str)

In [50]:
for i in df.itertuples():
    print(i.date)

2020-05-01
2020-06-01
2020-07-01
2020-08-01
2020-09-01
2020-10-01
