In [1]:
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

import os
import sys

sys.path.append("../../../")
from src.athena import Athena
from src.utils import create_session

## Global

In [2]:
boto3_session = create_session(
    profile_name="dev",
    role_arn=os.getenv("ATHENA_IAM_ROLE_ARN"),
)

wait = True
ctas_approach = False

database = "foodie_fi"
tables = ["plans", "subscriptions"]
sql_path = "../sql/"

athena = Athena(boto3_session=boto3_session, s3_output=os.getenv("ATHENA_S3_OUTPUT"))
athena

Athena(boto3_session=Session(region_name='us-east-1'), s3_output=s3://sql-case-studies/query_results)

## Create Database & Table

In [35]:
response = athena.create_database(database=database, wait=wait)

response

Query executed successfully


In [40]:
ddls = {}
for table in tables:
    with open(os.path.join(sql_path, f"{table}_ddl.sql"), "r") as f:
        ddls[table] = f.read()

    print(ddls[table])
    print("-" * 50)
    print("\n")

CREATE EXTERNAL TABLE IF NOT EXISTS foodie_fi.plans (
  plan_id TINYINT COMMENT 'Unique identifier for the plan',
  plan_name VARCHAR(20) COMMENT 'Name of the subscription plan',
  price FLOAT COMMENT 'Price of the subscription plan'
)
COMMENT 'The plans table contains information about the different subscription plans available, including churn events'
STORED AS PARQUET
LOCATION 's3://sql-case-studies/foodie_fi/plans/'
TBLPROPERTIES ('classification'='parquet', 'parquet.compress'='SNAPPY');

--------------------------------------------------


CREATE EXTERNAL TABLE IF NOT EXISTS foodie_fi.subscriptions (
  customer_id SMALLINT COMMENT 'Unique identifier for the customer',
  plan_id TINYINT COMMENT 'Unique identifier for the plan associated with the subscription',
  start_date TIMESTAMP COMMENT 'Start date of the subscription'
)
COMMENT 'The subscriptions table stores information about customer subscriptions to various plans'
STORED AS PARQUET
LOCATION 's3://sql-case-studies/foodie_fi/

In [41]:
for ddl in ddls.values():
    response = athena.create_table(database=database, query=ddl, wait=wait)
    response

Query executed successfully
Query executed successfully


In [42]:
for table in tables:
    athena.query(
        database=database,
        query=f""" 
                SELECT
                    *
                FROM
                    {database}.{table} 
                LIMIT 10;
              """,
        ctas_approach=ctas_approach,
    )

Unnamed: 0,plan_id,plan_name,price
0,0,trial,0.0
1,1,basic monthly,9.9
2,2,pro monthly,19.9
3,3,pro annual,199.0
4,4,churn,


Unnamed: 0,customer_id,plan_id,start_date
0,1,0,2020-08-01
1,1,1,2020-08-08
2,2,0,2020-09-20
3,4,1,2020-01-24
4,4,4,2020-04-21
5,5,0,2020-08-03
6,2,3,2020-09-27
7,3,0,2020-01-13
8,3,1,2020-01-20
9,4,0,2020-01-17


## Drop Database & Table

In [3]:
for table in tables:
    athena.drop_table(database=database, table=table, wait=wait)

Query executed successfully
Query executed successfully


In [4]:
athena.drop_database(database=database, wait=wait)

Query executed successfully
