In [1]:
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

import os
import sys

sys.path.append("../../../")
from src.athena import Athena
from src.utils import create_session

## Global

In [2]:
boto3_session = create_session(
    profile_name="dev",
    role_arn="arn:aws:iam::722696965592:role/athena-full-access-role",
)

wait = True
ctas_approach = False

database = "clique_bait"
tables = [
    "users",
    "events",
    "event_identifier",
    "campaign_identifier",
    "page_hierarchy",
]
sql_path = "../sql/"

athena = Athena(boto3_session=boto3_session, s3_output=os.getenv("ATHENA_S3_OUTPUT"))
athena

Athena(boto3_session=Session(region_name='us-east-1'), s3_output=s3://sql-case-studies/query_results)

## Create Database & Table

In [7]:
response = athena.create_database(database=database, wait=wait)

response

Query executed successfully


In [8]:
ddls = {}
for table in tables:
    with open(os.path.join(sql_path, f"{table}_ddl.sql"), "r") as f:
        ddls[table] = f.read()

    print(ddls[table])
    print("-" * 50)
    print("\n")

CREATE EXTERNAL TABLE IF NOT EXISTS clique_bait.users (
  user_id INT,
  cookie_id VARCHAR(6),
  start_date TIMESTAMP
)
COMMENT 'The users table stores customers who visit the Clique Bait website and their tagged cookie IDs'
STORED AS PARQUET
LOCATION 's3://sql-case-studies/clique_bait/users/'
TBLPROPERTIES ('classification'='parquet', 'parquet.compress'='SNAPPY');

--------------------------------------------------


CREATE EXTERNAL TABLE IF NOT EXISTS clique_bait.events (
  visit_id VARCHAR(6),
  cookie_id VARCHAR(6),
  page_id INT,
  event_type INT,
  sequence_number INT,
  event_time TIMESTAMP
)
COMMENT 'The events table captures all customers vists that are logged at the cookie ID level'
STORED AS PARQUET
LOCATION 's3://sql-case-studies/clique_bait/events/'
TBLPROPERTIES ('classification'='parquet', 'parquet.compress'='SNAPPY');

--------------------------------------------------


CREATE EXTERNAL TABLE IF NOT EXISTS clique_bait.event_identifier (
  event_type INT,
  event_name VA

In [11]:
for ddl in ddls.values():
    response = athena.create_table(database=database, query=ddl, wait=wait)
    response

Query executed successfully
Query executed successfully
Query executed successfully
Query executed successfully
Query executed successfully


In [12]:
for table in tables:
    athena.query(
        database=database,
        query=f""" 
                SELECT
                    *
                FROM
                    {database}.{table} 
                LIMIT 5;
              """,
        ctas_approach=ctas_approach,
    )

Unnamed: 0,user_id,cookie_id,start_date
0,1,c4ca42,2020-02-04
1,2,c81e72,2020-01-18
2,3,eccbc8,2020-02-21
3,4,a87ff6,2020-02-22
4,5,e4da3b,2020-02-01


Unnamed: 0,visit_id,cookie_id,page_id,event_type,sequence_number,event_time
0,ccf365,c4ca42,1,1,1,2020-02-04 19:16:09.182
1,ccf365,c4ca42,2,1,2,2020-02-04 19:16:17.358
2,ccf365,c4ca42,6,1,3,2020-02-04 19:16:58.454
3,ccf365,c4ca42,9,1,4,2020-02-04 19:16:58.609
4,ccf365,c4ca42,9,2,5,2020-02-04 19:17:51.729


Unnamed: 0,event_type,event_name
0,1,Page View
1,2,Add to Cart
2,3,Purchase
3,4,Ad Impression
4,5,Ad Click


Unnamed: 0,campaign_id,products,campaign_name,start_date,end_date
0,1,1-3,BOGOF - Fishing For Compliments,2020-01-01,2020-01-14
1,2,4-5,25% Off - Living The Lux Life,2020-01-15,2020-01-28
2,3,6-8,Half Off - Treat Your Shellf(ish),2020-02-01,2020-03-31


Unnamed: 0,page_id,page_name,product_category,product_id
0,1,Home Page,,
1,2,All Products,,
2,3,Salmon,Fish,1.0
3,4,Kingfish,Fish,2.0
4,5,Tuna,Fish,3.0


## Drop Database & Table

In [3]:
for table in tables:
    athena.drop_table(database=database, table=table, wait=wait)

Query executed successfully
Query executed successfully
Query executed successfully
Query executed successfully
Query executed successfully


In [6]:
athena.drop_database(database=database, wait=wait)

Query executed successfully
