In [1]:
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

import os
import sys

sys.path.append("../../../")
from src.athena import Athena
from src.utils import create_session

## Global

In [2]:
boto3_session = create_session(
    profile_name="dev",
    role_arn=os.getenv("ATHENA_IAM_ROLE_ARN"),
)

wait = True
ctas_approach = False

database = "data_mart"
tables = ["weekly_sales"]
sql_path = "../sql/"

athena = Athena(boto3_session=boto3_session, s3_output=os.getenv("ATHENA_S3_OUTPUT"))
athena

Athena(boto3_session=Session(region_name='us-east-1'), s3_output=s3://sql-case-studies/query_results)

## Create Database & Table

In [4]:
response = athena.create_database(database=database, wait=wait)

response

Query executed successfully


In [5]:
ddls = {}
for table in tables:
    with open(os.path.join(sql_path, f"{table}_ddl.sql"), "r") as f:
        ddls[table] = f.read()

    print(ddls[table])
    print("-" * 50)
    print("\n")

CREATE EXTERNAL TABLE IF NOT EXISTS data_mart.weekly_sales (
    week_date TIMESTAMP COMMENT 'The starting date of the sales week for each record',
    region VARCHAR(20) COMMENT "Represents the geographical area of operations within Data Mart's multi-region strategy",
    platform VARCHAR(10) COMMENT 'Indicates whether sales occurred through the retail channel or the online Shopify storefront',
    segment VARCHAR(10) COMMENT 'Categorizes customers based on demographic and age-related groupings',
    customer_type VARCHAR(10) COMMENT 'Provides additional demographic details, such as lifestyle or purchasing behavior',
    transactions INT COMMENT 'The count of unique purchases made during the corresponding sales week',
    sales DOUBLE COMMENT 'The total dollar amount of purchases made in the corresponding sales week'
) 
COMMENT 'Sales data containing weekly transaction and sales information by region, platform, segment, and customer type' 
STORED AS PARQUET LOCATION 's3://sql-case-stu

In [6]:
for ddl in ddls.values():
    response = athena.create_table(database=database, query=ddl, wait=wait)
    response

Query executed successfully


In [7]:
for table in tables:
    athena.query(
        database=database,
        query=f""" 
                SELECT
                    *
                FROM
                    {database}.{table} 
                LIMIT 10;
              """,
        ctas_approach=ctas_approach,
    )

Unnamed: 0,week_date,region,platform,segment,customer_type,transactions,sales
0,2020-08-31,ASIA,Retail,C3,New,120631,3656163.0
1,2020-08-31,ASIA,Retail,F1,New,31574,996575.0
2,2020-08-31,USA,Retail,,Guest,529151,16509610.0
3,2020-08-31,EUROPE,Retail,C1,New,4517,141942.0
4,2020-08-31,AFRICA,Retail,C2,New,58046,1758388.0
5,2020-08-31,CANADA,Shopify,F2,Existing,1336,243878.0
6,2020-08-31,AFRICA,Shopify,F3,Existing,2514,519502.0
7,2020-08-31,ASIA,Shopify,F1,Existing,2158,371417.0
8,2020-08-31,AFRICA,Shopify,F2,New,318,49557.0
9,2020-08-31,AFRICA,Retail,C3,New,111032,3888162.0


## Drop Database & Table

In [3]:
for table in tables:
    athena.drop_table(database=database, table=table, wait=wait)

Query executed successfully


In [4]:
athena.drop_database(database=database, wait=wait)

Query executed successfully
