In [70]:
#Setup
import pandas as pd
from db import get_engine

engine = get_engine()

with engine.begin() as connection:
    connection.exec_driver_sql("PRAGMA foreign_keys = ON;")
    connection.exec_driver_sql("ATTACH DATABASE 'raw.db' AS raw;")
    connection.exec_driver_sql("ATTACH DATABASE 'relational.db' AS relational;")

In [71]:
#Validating that all required relational tables exist before performing sanity checks
#Failing indicates that loading relational tables (04_relational_model) was not run
tables_needed = [
    'erp_product_history',
    'erp_product_categories',
    'crm_products',
    'crm_sales',
    'crm_customers',
    'erp_customers',
    'erp_locations'
]

tables_loaded = pd.read_sql("""
SELECT name
FROM relational.sqlite_master
WHERE type = 'table'
AND name NOT LIKE 'sqlite%';""", engine)['name'].tolist()

missing = set(tables_needed) - set(tables_loaded)

if missing:
    print(f'Missing Tables:')
    for m in missing:
        print(f'{m}')
    raise RuntimeError(
    f'Run 04_relational_model.ipynb first')

## Sanity Checks

In [72]:
#Ensuring correct number of primary and foreign keys were created
fk_expected = {
    'erp_product_history': 1,
    'erp_product_categories': 0,
    'crm_products': 1,
    'crm_sales': 2,
    'crm_customers': 1,
    'erp_customers': 1,
    'erp_locations': 0
}

results = []

incorrect_fk = []

with engine.begin() as connection:
    for table in tables_loaded:
        pk = pd.read_sql(f"""PRAGMA relational.table_info({table});""", connection)['pk'].sum()
        fk = len(pd.read_sql(f"""PRAGMA relational.foreign_key_list({table});""", connection))
        if pk != 1:
            raise RuntimeError(f'No primary key defined in {table}')
        if fk != fk_expected[table]:
            raise RuntimeError(f'Incorrect Number of Foreign Keys in {table}\n'
                              f'Expected: {fk_expected[table]}, Actual: {fk}')
            
print('Correct number of primary and foreign keys!')

Correct number of primary and foreign keys!


In [73]:
#Viewing columns, column types, primary keys, and foreign keys in relational tables
for table in tables_needed:
    print(f'Table: {table}')
    display(pd.read_sql(f"""
    PRAGMA relational.table_info({table});""", engine))
    display(pd.read_sql(f"""
    PRAGMA relational.foreign_key_list({table});""", engine))

Table: erp_product_history


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,PRICE_ID,INTEGER,0,,1
1,1,PRD_ID,INTEGER,0,,0
2,2,PRD_COST,INTEGER,0,,0
3,3,PRD_START_DT,DATE,0,,0
4,4,PRD_END_DT,DATE,0,,0


Unnamed: 0,id,seq,table,from,to,on_update,on_delete,match
0,0,0,crm_products,PRD_ID,prd_id,NO ACTION,NO ACTION,NONE


Table: erp_product_categories


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,ID,TEXT,0,,1
1,1,CAT,TEXT,0,,0
2,2,SUBCAT,TEXT,0,,0
3,3,MAINTENANCE,TEXT,0,,0


Unnamed: 0,id,seq,table,from,to,on_update,on_delete,match


Table: crm_products


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,prd_id,INTEGER,0,,1
1,1,cat_id,TEXT,0,,0
2,2,prd_key,TEXT,0,,0
3,3,prd_nm,TEXT,0,,0
4,4,prd_line,TEXT,0,,0


Unnamed: 0,id,seq,table,from,to,on_update,on_delete,match
0,0,0,erp_product_categories,cat_id,ID,NO ACTION,NO ACTION,NONE


Table: crm_sales


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,sls_ord_key,TEXT,0,,1
1,1,sls_ord_num,TEXT,0,,0
2,2,sls_prd_key,TEXT,0,,0
3,3,sls_cust_id,INTEGER,0,,0
4,4,sls_order_dt,DATE,0,,0
5,5,sls_ship_dt,DATE,0,,0
6,6,sls_due_dt,DATE,0,,0
7,7,sls_sales,INTEGER,0,,0
8,8,sls_quantity,INTEGER,0,,0
9,9,sls_price,INTEGER,0,,0


Unnamed: 0,id,seq,table,from,to,on_update,on_delete,match
0,0,0,crm_customers,sls_cust_id,cst_id,NO ACTION,NO ACTION,NONE
1,1,0,crm_products,sls_prd_key,prd_key,NO ACTION,NO ACTION,NONE


Table: crm_customers


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,cst_id,INTEGER,0,,1
1,1,cst_key,TEXT,0,,0
2,2,cst_firstname,TEXT,0,,0
3,3,cst_lastname,TEXT,0,,0
4,4,cst_marital_status,TEXT,0,,0
5,5,cst_gndr,TEXT,0,,0
6,6,cst_create_date,DATE,0,,0


Unnamed: 0,id,seq,table,from,to,on_update,on_delete,match
0,0,0,erp_customers,cst_key,CID,NO ACTION,NO ACTION,NONE


Table: erp_customers


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,CID,TEXT,0,,1
1,1,BDATE,DATE,0,,0
2,2,GEN,TEXT,0,,0


Unnamed: 0,id,seq,table,from,to,on_update,on_delete,match
0,0,0,erp_locations,CID,CID,NO ACTION,NO ACTION,NONE


Table: erp_locations


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,CID,TEXT,0,,1
1,1,CNTRY,TEXT,0,,0


Unnamed: 0,id,seq,table,from,to,on_update,on_delete,match
