In [52]:
#Setup
import pandas as pd
from db import get_engine

engine = get_engine()

with engine.begin() as connection:
    connection.exec_driver_sql("PRAGMA foreign_keys = ON;")
    connection.exec_driver_sql("ATTACH DATABASE 'raw.db' AS raw;")
    connection.exec_driver_sql("ATTACH DATABASE 'relational.db' AS relational;")

In [53]:
#Validating that all required staging views exist before performing creating relational tables
#Failing indicates that transformation (02_transformation) was not performed
tables_needed = [
    'stg_crm_customers',
    'stg_crm_products',
    'stg_crm_sales',
    'stg_erp_customers',
    'stg_erp_locations',
    'stg_erp_product_categories'
]

tables_loaded = pd.read_sql("""
SELECT name
FROM raw.sqlite_master
WHERE type = 'view'
;""", engine)['name'].tolist()

missing = set(tables_needed) - set(tables_loaded)

if missing:
    print(f'Missing Tables:')
    for m in missing:
        print(f'{m}')
    raise RuntimeError(
    f'Run 02_transformation.ipynb first')

### Loading Data into a Relational Model

#### erp_locations

In [54]:
#Creating relational.erp_locations table with proper relational model format
with engine.begin() as connection:
    connection.exec_driver_sql("""
    DROP TABLE IF EXISTS relational.erp_locations
    """)

    connection.exec_driver_sql("""
    CREATE TABLE relational.erp_locations(
        CID TEXT PRIMARY KEY,
        CNTRY TEXT
    );""")

    connection.exec_driver_sql("""
    INSERT INTO relational.erp_locations(
        CID,
        CNTRY
    )
    SELECT
        CID,
        CNTRY
    FROM raw.stg_erp_locations
    ;""")

#### erp_customers

In [55]:
#Creating relational.erp_customers table with proper relational model format
with engine.begin() as connection:
    connection.exec_driver_sql("""
    DROP TABLE IF EXISTS relational.erp_customers
    """)

    connection.exec_driver_sql("""
    CREATE TABLE relational.erp_customers(
        CID TEXT PRIMARY KEY,
        BDATE DATE,
        GEN TEXT,
        FOREIGN KEY (CID) REFERENCES erp_locations(CID)
    );""")

    connection.exec_driver_sql("""
    INSERT INTO relational.erp_customers(
        CID,
        BDATE,
        GEN
    )
    SELECT
        CID,
        BDATE,
        GEN
    FROM raw.stg_erp_customers
    ;""")

#### crm_customers

In [56]:
#Creating relational.erp_customers table with proper relational model format
with engine.begin() as connection:
    connection.exec_driver_sql("""
    DROP TABLE IF EXISTS relational.crm_customers;
    """)

    connection.exec_driver_sql("""
    CREATE TABLE relational.crm_customers(
        cst_id INTEGER PRIMARY KEY,
        cst_key TEXT UNIQUE,
        cst_firstname TEXT,
        cst_lastname TEXT,
        cst_marital_status TEXT,
        cst_gndr TEXT,
        cst_create_date DATE,
        FOREIGN KEY (cst_key) REFERENCES erp_customers(CID)
    );""")

    connection.exec_driver_sql("""
    INSERT INTO relational.crm_customers(
        cst_id,
        cst_key,
        cst_firstname,
        cst_lastname,
        cst_marital_status,
        cst_gndr,
        cst_create_date
    )
    SELECT
        cst_id,
        cst_key,
        cst_firstname,
        cst_lastname,
        cst_marital_status,
        cst_gndr,
        cst_create_date
    FROM raw.stg_crm_customers
    ;""")

#### erp_product_categories

In [57]:
#Creating relational.erp_product_categories table with proper relational model format
with engine.begin() as connection:
    connection.exec_driver_sql("""
    DROP TABLE IF EXISTS relational.erp_product_categories
    """)

    connection.exec_driver_sql("""
    CREATE TABLE relational.erp_product_categories(
        ID TEXT PRIMARY KEY,
        CAT TEXT,
        SUBCAT TEXT UNIQUE,
        MAINTENANCE TEXT
    );""")

    connection.exec_driver_sql("""
    INSERT INTO relational.erp_product_categories(
        ID,
        CAT,
        SUBCAT,
        MAINTENANCE
    )
    SELECT
        ID,
        CAT,
        SUBCAT,
        MAINTENANCE
    FROM raw.stg_erp_product_categories
    ;""")

#### crm_products

In [58]:
#Creating relational.crm_products table with proper relational model format
with engine.begin() as connection:
    connection.exec_driver_sql("""
    DROP TABLE IF EXISTS relational.crm_products
    """)

    connection.exec_driver_sql("""
    CREATE TABLE relational.crm_products(
        prd_id INTEGER PRIMARY KEY AUTOINCREMENT,
        cat_id TEXT,
        prd_key TEXT UNIQUE,
        prd_nm TEXT UNIQUE,
        prd_line TEXT,
        FOREIGN KEY (cat_id) REFERENCES erp_product_categories(ID)
    );""")

    connection.exec_driver_sql("""
    INSERT INTO relational.crm_products(
        cat_id,
        prd_key,
        prd_nm,
        prd_line
    )
    SELECT DISTINCT
        p.cat_id,
        p.prd_key,
        p.prd_nm,
        p.prd_line
    FROM raw.stg_crm_products p
    JOIN relational.erp_product_categories c
    ON p.cat_id = c.ID
    ;""")

#### crm_sales

In [59]:
#Creating relational.crm_sales table with proper relational model format
with engine.begin() as connection:
    connection.exec_driver_sql("""
    DROP TABLE IF EXISTS relational.crm_sales
    """)

    connection.exec_driver_sql("""
    CREATE TABLE relational.crm_sales(
        sls_ord_key TEXT PRIMARY KEY,
        sls_ord_num TEXT,
        sls_prd_key TEXT,
        sls_cust_id INTEGER,
        sls_order_dt DATE,
        sls_ship_dt DATE,
        sls_due_dt DATE,
        sls_sales INTEGER,
        sls_quantity INTEGER,
        sls_price INTEGER,
        FOREIGN KEY (sls_prd_key) REFERENCES crm_products(prd_key),
        FOREIGN KEY (sls_cust_id) REFERENCES crm_customers(cst_id)
    );""")

    connection.exec_driver_sql("""
    INSERT INTO relational.crm_sales(
        sls_ord_key,
        sls_ord_num,
        sls_prd_key,
        sls_cust_id,
        sls_order_dt,
        sls_ship_dt,
        sls_due_dt,
        sls_sales,
        sls_quantity,
        sls_price
    )
    SELECT
        sls_ord_key,
        sls_ord_num,
        sls_prd_key,
        sls_cust_id,
        sls_order_dt,
        sls_ship_dt,
        sls_due_dt,
        sls_sales,
        sls_quantity,
        sls_price
    FROM raw.stg_crm_sales
    ;""")

#### erp_product_history

In [60]:
#Creating relational.erp_product_history table with proper relational model format
with engine.begin() as connection:
    connection.exec_driver_sql("""
    DROP TABLE IF EXISTS relational.erp_product_history
    """)

    connection.exec_driver_sql("""
    CREATE TABLE relational.erp_product_history(
        PRICE_ID INTEGER PRIMARY KEY AUTOINCREMENT,
        PRD_ID INTEGER,
        PRD_COST INTEGER,
        PRD_START_DT DATE,
        PRD_END_DT DATE,
        FOREIGN KEY (PRD_ID) REFERENCES crm_products(prd_id)
    );""")

    connection.exec_driver_sql("""
    INSERT INTO relational.erp_product_history (
        PRD_ID,
        PRD_COST,
        PRD_START_DT,
        PRD_END_DT
    )
    SELECT
        p.prd_id,
        s.prd_cost,
        s.prd_start_dt,
        s.prd_end_dt
    FROM raw.stg_crm_products s
    JOIN crm_products p
      ON s.prd_key = p.prd_key;
    """)

### Sanity Check

In [61]:
#Ensuring tables were created
expected_tables = ['erp_locations', 
                   'erp_customers',
                   'crm_customers',
                   'erp_product_categories',
                   'crm_products',
                   'crm_sales',
                   'erp_product_history']

actual_tables = pd.read_sql("""
SELECT name, type
FROM relational.sqlite_master
WHERE type = 'table'
AND name NOT LIKE 'sqlite%';
""", engine)['name'].to_list()

missing = set(expected_tables) - set(actual_tables)
        
if missing:
    print('Missing Tables:')
    for m in missing:
        print(f'{m}')
    raise RuntimeError(
    'Run cells in order')
else:
    print('All tables created as expected!')

All tables created as expected!
