# General preprocessing for all models.
Overall Process:
- Merge offer, transaction and history dataset
- Modify negative sales amount to 0
- Remove transaction data that are processed after the earliest offer date based on a store chain basis.

### 1. Create a duplicate transactions table
- New table name: preprocessed_transactions


In [None]:
CREATE OR REPLACE TABLE preprocessed_transactions AS
SELECT * FROM transactions;

### 2. Change negative sales amount to 0

In [None]:
UPDATE preprocessed_transactions
SET purchase_amount = CASE
    WHEN purchase_amount < 0 THEN 0
    ELSE purchase_amount
END;

### 3. Merge offer and history table

In [None]:
CREATE OR REPLACE TEMPORARY TABLE merged_history_offer AS
SELECT 
    h.customer_id,
    h.store_chain_id,
    h.offer_id, 
    h.offer_date,
    o.product_category,
    o.company_id,
    o.brand_id
FROM 
    history AS h
LEFT JOIN 
    offers AS o
ON
    h.offer_id = o.offer_id;

In [None]:
SELECT TOP 1 * FROM merged_history_offer;

### 4. Merge the offer & history table to the transactions table.
- Used to remove all transactions after the first offer date on a store chain basis.

In [None]:
CREATE OR REPLACE TABLE preprocessed_transactions AS
SELECT 
    p.customer_id,
    p.store_chain_id,
    p.product_category,
    p.company_id,
    p.product_brand,
    p.date,
    p.purchase_quantity,
    p.purchase_amount,
    m.offer_date,
FROM 
    preprocessed_transactions AS p
LEFT JOIN
    merged_history_offer AS m
ON 
    p.customer_id = m.customer_id AND
    p.store_chain_id = m.store_chain_id AND
    p.product_category = m.product_category AND
    p.company_id = m.company_id AND
    p.product_brand = m.brand_id;
    

In [None]:
SELECT * FROM preprocessed_transactions;

### 10. Summary
- store_2_preprocessed_transactions table contains
    1. CUSTOMER_ID: NUMBER(38,0)
    2. STORE_CHAIN_ID: NUMBER(38,0)
    3. PRODUCT_CATEGORY: NUMBER(38,0)
    4. COMPANY_ID: NUMBER(38,0)
    5. PRODUCT_BRAND: NUMBER(38,0)
    6. DATE: DATE
    7. PURCHASE_QUANTITY: NUMBER(38,0)
    8. PURCHASE_AMOUNT: FLOAT
    9. OFFER_DATE: DATE