In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.float_format', lambda x: '%.3f' % x)

sns.set()

%matplotlib inline

from sklearn.multiclass import OneVsRestClassifier
import xgboost

from src.model import get_xgb_model, fit_model, predict_proba, predict_ordered_list, evaluate_result
from src.feature_engineering import enhance_with_timeseries_features
from src.clean_dataset import get_prediction_features, reduce_train
from src.metrics import mapk, transform_y

In [2]:
train_X = pd.read_pickle('generated_files/120K/train_X.pkl')
train_Y = pd.read_pickle('generated_files/120K/train_Y.pkl')

test_X = pd.read_pickle('generated_files/120K/test_X.pkl')
test_Y = pd.read_pickle('generated_files/120K/test_Y.pkl')

In [3]:
prediction_features = get_prediction_features(train_X)

enhance_with_timeseries_features(train_X, prediction_features)
enhance_with_timeseries_features(test_X, prediction_features)

In [4]:
X_tr_reduced, Y_tr_reduced = reduce_train(train_X, train_Y)
X_ts_reduced, Y_ts_reduced = reduce_train(test_X, test_Y)

In [5]:
model = get_xgb_model()

In [6]:
fit_model(model, X_tr_reduced, Y_tr_reduced)

In [7]:
probas = predict_proba(model, X_ts_reduced, prediction_features)
ordered_lists = predict_ordered_list(probas)
evaluate_result(ordered_lists, Y_ts_reduced)

0.8473047617906742

In [8]:
test_last_month = X_ts_reduced['Row_Date'] == '2016-04-28'
X_ts_last = X_ts_reduced[test_last_month]
Y_ts_last = Y_ts_reduced[test_last_month]

probas = predict_proba(model, X_ts_last, prediction_features)
ordered_lists = predict_ordered_list(probas)
evaluate_result(ordered_lists, Y_ts_last)

0.8748325819479665