In [1]:
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

import os
import sys

sys.path.append("../../../")
from src.athena import Athena
from src.utils import create_session

## Global

In [2]:
boto3_session = create_session(
    profile_name="dev",
    role_arn=os.getenv("ATHENA_IAM_ROLE_ARN"),
)

wait = True
ctas_approach = False

database = "balanced_tree"
tables = ["product_details", "product_hierarchy", "product_prices", "sales"]
sql_path = "../sql/"

athena = Athena(boto3_session=boto3_session, s3_output=os.getenv("ATHENA_S3_OUTPUT"))
athena

Athena(boto3_session=Session(region_name='us-east-1'), s3_output=s3://sql-case-studies/query_results)

## Create Database & Table

In [3]:
response = athena.create_database(database=database, wait=wait)

response

Query executed successfully


In [7]:
ddls = {}
for table in tables:
    with open(os.path.join(sql_path, f"{table}_ddl.sql"), "r") as f:
        ddls[table] = f.read()

    print(ddls[table])
    print("-" * 50)
    print("\n")

CREATE EXTERNAL TABLE IF NOT EXISTS balanced_tree.product_details (
  product_id VARCHAR(6) COMMENT 'Unique identifier for the product',
  price SMALLINT COMMENT 'Price of the product in the store',
  product_name VARCHAR(50) COMMENT 'Name of the product',
  category_id SMALLINT COMMENT 'Unique identifier for the category',
  segment_id SMALLINT COMMENT 'Unique identifier for the segment',
  style_id SMALLINT COMMENT 'Unique identifier for the style',
  category_name VARCHAR(10) COMMENT 'Name of the category',
  segment_name VARCHAR(10) COMMENT 'Name of the segment',
  style_name VARCHAR(50) COMMENT 'Name of the style'
)
COMMENT 'The product details table includes all information about the products featured in the store'
STORED AS PARQUET
LOCATION 's3://sql-case-studies/balanced_tree/product_details/'
TBLPROPERTIES ('classification'='parquet', 'parquet.compress'='SNAPPY');

--------------------------------------------------


CREATE EXTERNAL TABLE IF NOT EXISTS balanced_tree.product_hi

In [8]:
for ddl in ddls.values():
    response = athena.create_table(database=database, query=ddl, wait=wait)
    response

Query executed successfully
Query executed successfully
Query executed successfully
Query executed successfully


In [9]:
for table in tables:
    athena.query(
        database=database,
        query=f""" 
                SELECT
                    *
                FROM
                    {database}.{table} 
                LIMIT 10;
              """,
        ctas_approach=ctas_approach,
    )

Unnamed: 0,product_id,price,product_name,category_id,segment_id,style_id,category_name,segment_name,style_name
0,c4a632,13,Navy Oversized Jeans - Womens,1,3,7,Womens,Jeans,Navy Oversized
1,e83aa3,32,Black Straight Jeans - Womens,1,3,8,Womens,Jeans,Black Straight
2,e31d39,10,Cream Relaxed Jeans - Womens,1,3,9,Womens,Jeans,Cream Relaxed
3,d5e9a6,23,Khaki Suit Jacket - Womens,1,4,10,Womens,Jacket,Khaki Suit
4,72f5d4,19,Indigo Rain Jacket - Womens,1,4,11,Womens,Jacket,Indigo Rain
5,9ec847,54,Grey Fashion Jacket - Womens,1,4,12,Womens,Jacket,Grey Fashion
6,5d267b,40,White Tee Shirt - Mens,2,5,13,Mens,Shirt,White Tee
7,c8d436,10,Teal Button Up Shirt - Mens,2,5,14,Mens,Shirt,Teal Button Up
8,2a2353,57,Blue Polo Shirt - Mens,2,5,15,Mens,Shirt,Blue Polo
9,f084eb,36,Navy Solid Socks - Mens,2,6,16,Mens,Socks,Navy Solid


Unnamed: 0,id,parent_id,level_text,level_name
0,1,,Womens,Category
1,4,1.0,Jacket,Segment
2,5,2.0,Shirt,Segment
3,6,2.0,Socks,Segment
4,7,3.0,Navy Oversized,Style
5,8,3.0,Black Straight,Style
6,9,3.0,Cream Relaxed,Style
7,10,4.0,Khaki Suit,Style
8,2,,Mens,Category
9,3,1.0,Jeans,Segment


Unnamed: 0,id,product_id,price
0,7,c4a632,13
1,8,e83aa3,32
2,9,e31d39,10
3,10,d5e9a6,23
4,11,72f5d4,19
5,12,9ec847,54
6,13,5d267b,40
7,14,c8d436,10
8,15,2a2353,57
9,16,f084eb,36


Unnamed: 0,prod_id,qty,price,discount,member,txn_id,start_txn_time
0,c4a632,4,13,17,t,54f307,2021-02-13 01:59:43.296
1,5d267b,4,40,17,t,54f307,2021-02-13 01:59:43.296
2,b9a74d,4,17,17,t,54f307,2021-02-13 01:59:43.296
3,2feb6b,2,29,17,t,54f307,2021-02-13 01:59:43.296
4,c4a632,5,13,21,t,26cc98,2021-01-19 01:39:00.345
5,e31d39,2,10,21,t,26cc98,2021-01-19 01:39:00.345
6,72f5d4,3,19,21,t,26cc98,2021-01-19 01:39:00.345
7,2a2353,3,57,21,t,26cc98,2021-01-19 01:39:00.345
8,f084eb,3,36,21,t,26cc98,2021-01-19 01:39:00.345
9,c4a632,1,13,21,f,ef648d,2021-01-27 02:18:17.164


## Drop Database & Table

In [3]:
for table in tables:
    athena.drop_table(database=database, table=table, wait=wait)

Query executed successfully
Query executed successfully
Query executed successfully
Query executed successfully


In [4]:
athena.drop_database(database=database, wait=wait)

Query executed successfully
