## Setup

In [1]:
import os

from db.Db import DataBase
from data_preprocessing.Category import Category
from datasets.Trainset import Trainset
from feature_engineering.Feature import Feature
from datasets.Testset import Testset
from Trainer import Trainer

## Load Data

In [2]:
# Load Data
file_path = os.getcwd() + "/../data/"
dbase = DataBase(file_path)

## Data Preprocessing

In [3]:
# orignal_price
original_price = dbase.original_price()

# baskets data
baskets_data = dbase.load_basket_data()

In [4]:
# Product Category Table
catClass = Category(baskets_data)
prods_cat_table = catClass.generate_product_category_table()

In [5]:
# Split Data
baskets_train, baskets_test, coupons_train, coupons_test = dbase.generate_split_data_with_category(prods_cat_table)

print(baskets_train.head(2))
print(baskets_test.head(2))
print(coupons_train.head(2))
print(coupons_test.head(2))

   week shopper product  price  target category
0     0       0      71    629       1       21
1     0       0      91    605       1       24
   week shopper product  price  target category
0    89       0      67    637       1       13
1    89       0      71    629       1       21
   week shopper product  discount category
0     0       0      35        35       11
1     0       0     193        40        5
   week shopper product  discount category
0    89       0     131        30        0
1    89       0      16        25       22


## Feature Engineering

In [7]:
# Define feat
print("feature engineering begin")
feat = Feature(baskets_train, coupons_train)

feature engineering begin


In [None]:
total_count_of_product = feat.total_count_of_product()
print(total_count_of_product.head(2))

In [None]:
reordered_product = feat.reordered_product()
print(reordered_product.head(2))

In [None]:
category_count = feat.category_count()
print(category_count.head(2))

In [None]:
reordered_category = feat.reordered_category()
print(reordered_category.head(2))

In [None]:
coupon_in_same_category = feat.coupon_in_same_category()
print(coupon_in_same_category.head(2))

In [None]:
ratio_of_reordered_products_per_shopper = feat.ratio_of_reordered_products_per_shopper()


In [None]:
ratio_of_reordered_categories_per_shopper = feat.ratio_of_reordered_categories_per_shopper()


In [None]:
average_price_per_shopper = feat.average_price_per_shopper()


In [None]:
average_basket_size = feat.average_basket_size()


In [8]:
unique_products_per_shopper = feat.unique_products_per_shopper()


In [None]:
unique_categories_per_shopper = feat.unique_categories_per_shopper()


In [9]:
print("feature engineering finished")

feature engineering finished


## Training Set

In [None]:
# Train Table
train_t = Trainset(baskets_train, coupons_train, original_price)

In [None]:
## full_df_train
full_df_train = train_t.generate_full_df_train()
print("----- full_df_train -----")
print(full_df_train.head(2))

In [None]:
## Generate training set
training_set = train_t.generate_training_set(
    prods_cat_table,
    original_price,
    total_count_of_product,
    reordered_product,
    category_count,
    reordered_category,
    coupon_in_same_category,
    ratio_of_reordered_products_per_shopper,
    ratio_of_reordered_categories_per_shopper,
    average_price_per_shopper,
    average_basket_size,
    unique_products_per_shopper,
    unique_categories_per_shopper,
)
print("===== training_set =====")
print(training_set.head(2))

In [None]:
# X_train, y_train
X_train, y_train = train_t.split_trainingset_to_X_train_and_y_train(training_set)
print(X_train.head(2))
print(y_train.head(2))

## Testing Set

In [None]:
# testing table
test_t = Testset(baskets_test, coupons_test)

In [None]:
# full_df_test
full_df_test = test_t.generate_full_df_test()

In [None]:
# Generate testing set
testing_set = test_t.generate_training_set(
    prods_cat_table,
    original_price,
    total_count_of_product,
    reordered_product,
    category_count,
    reordered_category,
    coupon_in_same_category,
    ratio_of_reordered_products_per_shopper,
    ratio_of_reordered_categories_per_shopper,
    average_price_per_shopper,
    average_basket_size,
    unique_products_per_shopper,
    unique_categories_per_shopper,
    training_set
)