In [1]:
import numpy as np
import pandas as pd
import os
import time
import lightgbm as lgb
import seaborn as sns
import matplotlib.pyplot as plt
import gc
import pickle
from tqdm.auto import tqdm

import warnings
warnings.filterwarnings("ignore")

In [6]:
# load data
process_df = pd.read_pickle('processed.pkl')

# since we do it by store only, store_id is useless
# cat_id, dept_id are useful
unused_features = [
    'id',
    'state_id',
    'store_id',
    # 'cat_id',
    # 'dept_id',
    'date',
    'wm_yr_wk',
    'd',
    'sales',
    'revenue'
]

# retrieve training features
used_features = process_df.columns[~process_df.columns.isin(unused_features)]

FIRST_DAY = 0
STORES = ['CA_1', 'CA_2', 'CA_3', 'CA_4', 'TX_1', 'TX_2', 'TX_3', 'WI_1', 'WI_2', 'WI_3']

def prepare_data(df, store):
    df = df[df['d'] >= FIRST_DAY]
    df = df[(df['store_id'] == store)]
    return df

model_dir = './models_store/'

In [7]:
predictions = []
for store in STORES:
    print('starting:',store)

    # get test data
    pred_df = prepare_data(process_df,store)
    x_test = pred_df[(pred_df['date'] > '2016-04-24')]
    y_test = x_test['sales']

    # load model
    model_name = store+'_'+'.bin'
    model = pickle.load(open(os.path.join(model_dir,model_name),"rb"))
    
    # predict
    pred = model.predict(x_test[used_features])
    prediction = pd.DataFrame({'pred': pred})
    prediction.index = x_test.index.tolist()
    predictions.append(prediction)

    del pred_df, x_test, y_test
pred_df = pd.concat(predictions)

starting: CA_1
starting: CA_2
starting: CA_3
starting: CA_4
starting: TX_1
starting: TX_2
starting: TX_3
starting: WI_1
starting: WI_2
starting: WI_3


In [9]:
# postprocess predictions
x_test = process_df[(process_df['date'] > '2016-04-24')]
x_test = pd.merge(x_test,pred_df,left_index=True,right_index=True)

validation = x_test[(x_test['d'] < 1942)]
evaluation = x_test[(x_test['d'] >= 1942)]

validation = validation.pivot(index='id',columns='d',values='pred')
validation = validation.reset_index()
validation.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]
validation['id'] = validation['id'].apply(lambda x: x.replace('evaluation', 'validation'))

evaluation = evaluation.pivot(index='id',columns='d',values='pred')
evaluation = evaluation.reset_index()
evaluation.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]

output = pd.concat([validation, evaluation]).reset_index(drop=True)
output.head()

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,FOODS_1_001_CA_1_validation,0.858266,0.75257,0.734779,0.71681,1.057201,1.025184,1.047816,1.106663,0.899788,...,0.889225,1.229742,1.057615,1.064021,0.888159,0.888159,0.831087,0.958517,1.156251,1.075382
1,FOODS_1_001_CA_2_validation,0.780402,0.761726,0.761726,0.739971,0.809867,0.940574,0.885278,0.656037,0.662461,...,0.670691,0.767344,2.219912,1.305107,1.33136,1.308937,1.779446,1.780982,2.095231,1.501103
2,FOODS_1_001_CA_3_validation,1.558821,1.436377,1.451233,1.408782,1.360166,1.150305,1.126397,1.020402,0.987204,...,1.177929,1.212175,2.46242,1.571563,1.457014,1.451246,1.451246,1.434397,1.770405,1.178842
3,FOODS_1_001_CA_4_validation,0.520811,0.457434,0.443122,0.443122,0.447205,0.453887,0.446399,0.42761,0.387865,...,0.244673,0.258462,0.248593,0.372839,0.307953,0.297826,0.297826,0.346346,0.421307,0.45901
4,FOODS_1_001_TX_1_validation,0.549344,0.534888,0.514551,0.514551,0.487213,0.565545,0.624478,0.498249,0.503339,...,0.437866,0.470847,0.513903,0.392208,0.361159,0.361159,0.360283,0.370405,0.427117,0.4513


In [10]:
output.to_csv('submission_test_7.csv', index = False)