# Load data in Athena

***

## Libraries

In [17]:
import boto3
import sagemaker
!pip install --disable-pip-version-check -q PyAthena==2.1.0
from pyathena import connect
import pandas as pd

[0m

## Variables

In [27]:
db_name = "sdpd"
Bucket = 'sdpd-bucket' 
region = boto3.Session().region_name
s3_staging_dir = "s3://{}/athena/staging".format(Bucket)
conn = connect(region_name=region, s3_staging_dir=s3_staging_dir)
role = sagemaker.get_execution_role()
s3 = boto3.client('s3') # Create an S3 client
s3_resource = boto3.resource('s3') # Create an S3 resource

print('S3_staging_dir - > ',s3_staging_dir)
print('Conn - > ',conn)
print('Region - > ',region)


S3_staging_dir - >  s3://sdpd-bucket/athena/staging
Conn - >  <pyathena.connection.Connection object at 0x7f4f1aa831d0>
Region - >  us-east-1


## Database

### Create

In [37]:
statement = "CREATE DATABASE IF NOT EXISTS {}".format(db_name)
pd.read_sql(statement, conn)

statement = "SHOW DATABASES"

df_show = pd.read_sql(statement, conn)
print(df_show.head(5))

if db_name in df_show.values:
    ingest_create_athena_db_passed = True
    
%store ingest_create_athena_db_passed

  database_name
0       default
1        dsoaws
2          sdpd
3       watersd
Stored 'ingest_create_athena_db_passed' (bool)


### DB Setup

In [40]:
def Header(file):
    obj = s3.get_object(Bucket=Bucket, Key=file)
    header = obj['Body'].read(1000).decode().split('\n')[0]
    header + ',year'
    header_list = header.split(',')
    return header_list

In [41]:
Header('SDPD_Calls_2023.csv')

['incident_num',
 'date_time',
 'day_of_week',
 'address_number_primary',
 'address_dir_primary',
 'address_road_primary',
 'address_sfx_primary',
 'address_dir_intersecting',
 'address_road_intersecting',
 'address_sfx_intersecting',
 'call_type',
 'disposition',
 'beat',
 'priority']

In [None]:
table_name = 'calls'

# Build the CREATE TABLE statement with the header names as columns
create_table_sql = f"CREATE TABLE {db_name}.{table_name} ("
for col in header_list:
    create_table_sql += f"{col} string,"
create_table_sql = create_table_sql[:-1] + ")"  # Remove trailing comma and add closing parenthesis