In [9]:
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

import os
import sys

sys.path.append("../../../")
from src.athena import Athena
from src.utils import create_session

## Global

In [10]:
boto3_session = create_session(
    profile_name="dev",
    role_arn="arn:aws:iam::722696965592:role/athena-full-access-role",
)

wait = True
ctas_approach = False

database = "fresh_segments"
tables = ["interest_map", "interest_metrics"]
sql_path = "../sql/"

athena = Athena(boto3_session=boto3_session, s3_output=os.getenv("ATHENA_S3_OUTPUT"))
athena

Athena(boto3_session=Session(region_name='us-east-1'), s3_output=s3://sql-case-studies/query_results)

## Create Database & Table

In [3]:
response = athena.create_database(database=database, wait=wait)

response

Query executed successfully


In [4]:
ddls = {}
for table in tables:
    with open(os.path.join(sql_path, f"{table}_ddl.sql"), "r") as f:
        ddls[table] = f.read()

    print(ddls[table])
    print("-" * 50)
    print("\n")

CREATE EXTERNAL TABLE IF NOT EXISTS fresh_segments.interest_map (
  id INT COMMENT 'Unique identifier for the interest',
  interest_name VARCHAR(100) COMMENT 'Name of the interest',
  interest_summary VARCHAR(500) COMMENT 'Brief summary or description of the interest',
  created_at TIMESTAMP COMMENT 'Timestamp when the record was created',
  last_modified TIMESTAMP COMMENT 'Timestamp when the record was last modified'
)
COMMENT 'The interest map table links the interest IDs with the relevant interest names and summaries'
STORED AS PARQUET
LOCATION 's3://sql-case-studies/fresh_segments/interest_map/'
TBLPROPERTIES ('classification'='parquet', 'parquet.compress'='SNAPPY');

--------------------------------------------------


CREATE EXTERNAL TABLE IF NOT EXISTS fresh_segments.interest_metrics (
  record_month INT COMMENT 'Represents the month of the record',
  record_year INT COMMENT 'Represents the year of the record',
  month_year VARCHAR(7) COMMENT 'Month and year concatenated as a st

In [6]:
for ddl in ddls.values():
    response = athena.create_table(database=database, query=ddl, wait=wait)
    response

Query executed successfully
Query executed successfully


In [7]:
for table in tables:
    athena.query(
        database=database,
        query=f""" 
                SELECT
                    *
                FROM
                    {database}.{table} 
                LIMIT 10;
              """,
        ctas_approach=ctas_approach,
    )

Unnamed: 0,id,interest_name,interest_summary,created_at,last_modified
0,1,Fitness Enthusiasts,Consumers using fitness tracking apps and webs...,2016-05-26 14:57:59,2018-05-23 11:30:12
1,2,Gamers,Consumers researching game reviews and cheat c...,2016-05-26 14:57:59,2018-05-23 11:30:12
2,3,Car Enthusiasts,Readers of automotive news and car reviews.,2016-05-26 14:57:59,2018-05-23 11:30:12
3,4,Luxury Retail Researchers,Consumers researching luxury product reviews a...,2016-05-26 14:57:59,2018-05-23 11:30:12
4,5,Brides & Wedding Planners,People researching wedding ideas and vendors.,2016-05-26 14:57:59,2018-05-23 11:30:12
5,6,Vacation Planners,Consumers reading reviews of vacation destinat...,2016-05-26 14:57:59,2018-05-23 11:30:13
6,7,Motorcycle Enthusiasts,Readers of motorcycle news and reviews.,2016-05-26 14:57:59,2018-05-23 11:30:13
7,8,Business News Readers,Readers of online business news content.,2016-05-26 14:57:59,2018-05-23 11:30:12
8,12,Thrift Store Shoppers,Consumers shopping online for clothing at thri...,2016-05-26 14:57:59,2018-03-16 13:14:00
9,13,Advertising Professionals,People who read advertising industry news.,2016-05-26 14:57:59,2018-05-23 11:30:12


Unnamed: 0,record_month,record_year,month_year,interest_id,composition,index_value,ranking,percentile_ranking
0,7,2018,07-2018,32486,11.89,6.19,1,99.86
1,7,2018,07-2018,6106,9.93,5.31,2,99.73
2,7,2018,07-2018,18923,10.85,5.29,3,99.59
3,7,2018,07-2018,6344,10.32,5.1,4,99.45
4,7,2018,07-2018,100,10.77,5.04,5,99.31
5,7,2018,07-2018,69,10.82,5.03,6,99.18
6,7,2018,07-2018,79,11.21,4.97,7,99.04
7,7,2018,07-2018,6111,10.71,4.83,8,98.9
8,7,2018,07-2018,6214,9.71,4.83,8,98.9
9,7,2018,07-2018,19422,10.11,4.81,10,98.63


## Drop Database & Table

In [11]:
for table in tables:
    athena.drop_table(database=database, table=table, wait=wait)

Query executed successfully
Query executed successfully


In [12]:
athena.drop_database(database=database, wait=wait)

Query executed successfully
