# Model Evaluation

In [76]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as  np
import seaborn as sns
sns.set(style="whitegrid", color_codes=True)

path='data/kaggle/'
df_test = pd.read_csv(path +'test.csv', index_col=0)
df_test.index = pd.to_datetime(df_test.date)
df_test.drop(labels=['date'], inplace=True, axis=1)

df_train = pd.read_csv(path +'train.csv', index_col=0)
df_train.index = pd.to_datetime(df_train.index)

In [77]:
def series_to_supervised(data, window=1, lag=1, dropnan=True):
    cols, names = list(), list()
    for i in range(window, 0, -1):
        cols.append(data.shift(i))
        names += [('%s(t-%d)' % (col, i)) for col in data.columns]
    cols.append(data)
    names += [('%s(t)' % (col)) for col in data.columns]
    cols.append(data.shift(-lag))
    names += [('%s(t+%d)' % (col, lag)) for col in data.columns]
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    if dropnan:
        agg.dropna(inplace=True)
    return agg

def expand_df(df):
    data = df.copy()
    #data['month'] = data.index.month
    #data['year'] = data.index.year
    data['weekend'] = np.int32(data.index.dayofweek > 3)
    return data


df_test.head()

Unnamed: 0_level_0,store,item
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-01,1,1
2018-01-02,1,1
2018-01-03,1,1
2018-01-04,1,1
2018-01-05,1,1


In [78]:
import numpy as np

window = 1
lag = 0

df_train = series_to_supervised(df_train, window=window, lag=lag)
df_train = df_train[df_train.index.date==np.datetime64('2017-12-31')]

In [79]:
columns_to_drop = [('%s(t+%d)' % (col, lag)) for col in ['item', 'store']]
for i in range(window, 0, -1):
    columns_to_drop += [('%s(t-%d)' % (col, i)) for col in ['item', 'store']]

df_train.drop(labels=columns_to_drop, inplace=True, axis=1)
df_train.drop(labels=['sales(t+0)'], inplace=True, axis=1)
df_train.rename({'store(t)':'store', 'item(t)':'item'}, inplace=True,axis='columns')


In [80]:
X=expand_df(df_train)

In [81]:
from sklearn.preprocessing import OneHotEncoder

store_ohe = OneHotEncoder()
item_ohe = OneHotEncoder()

X_store = pd.DataFrame(store_ohe.fit_transform(X.store.values.reshape(-1,1)).toarray())
X_items = pd.DataFrame(item_ohe.fit_transform(X.item.values.reshape(-1,1)).toarray())

X = X.drop(['store','item'], axis=1)

X_store.columns=[u+str(v) for u,v in zip(['store_']*10,range(1,11))]
X_items.columns=[u+str(v) for u,v in zip(['item_']*50,range(1,51))]

In [82]:
X = np.concatenate([X,X_items,X_store],axis=1)

print X

[[27. 23.  1. ...  0.  0.  0.]
 [24. 31.  1. ...  0.  0.  0.]
 [31. 19.  1. ...  0.  0.  0.]
 ...
 [70. 76.  1. ...  1.  0.  0.]
 [62. 65.  1. ...  0.  1.  0.]
 [62. 82.  1. ...  0.  0.  1.]]
