# Data Querying

## Importing libraries and initiating sagemaker session

In [2]:
#!pip install awswrangler

In [7]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import sagemaker
import boto3
import botocore
import awswrangler as wr

config = botocore.config.Config()
sm = boto3.client(service_name="sagemaker", config=config)
sess = sagemaker.Session(sagemaker_client=sm)

bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = sess.boto_region_name

In [25]:
wr.catalog.create_database(
    name='UK Online Retail Store Database',
    exist_ok=True
)

In [26]:
wr.catalog.create_csv_table(database = "UK Online Retail Store Database",
                           path = f"s3://{bucket}/data/input/",
                           table = "df_input",
                           columns_types = {"CustomerID": "float",
                                            "Country": "string",
                                            "Recency": "int",
                                            "Frequency": "int",
                                            "MonetaryValue_x": "float",
                                            "MonetaryValue_y": "float"},
                           mode = "overwrite",
                           skip_header_line_count = 1,
                           sep = ",")

In [27]:
from IPython.core.display import display, HTML

display(HTML('<b>Review <a target="top" href="https://console.aws.amazon.com/glue/home?region={}#">AWS Glue Catalog</a></b>'.format(region)))

In [28]:
wr.athena.create_athena_bucket()

's3://aws-athena-query-results-397738742408-us-east-2/'

#### Checking if the data was loaded correctly

In [34]:
sql_statment = """
SELECT *
FROM df_input
"""

wr.athena.read_sql_query(sql=sql_statment, database = "UK Online Retail Store Database")

Unnamed: 0,customerid,country,recency,frequency,monetaryvalue_x,monetaryvalue_y
0,12347.0,Iceland,21,71,1494.160034,2104.050049
1,12348.0,Finland,86,11,594.440002,310.000000
2,12352.0,Norway,100,38,1561.810059,944.229980
3,12356.0,Portugal,83,57,2753.080078,2628.805908
4,12359.0,Cyprus,27,143,1531.557983,2876.850098
...,...,...,...,...,...,...
1866,18270.0,United Kingdom,104,5,111.949997,171.199997
1867,18272.0,United Kingdom,63,55,980.539978,2098.040039
1868,18273.0,United Kingdom,95,1,51.000000,153.000000
1869,18283.0,United Kingdom,7,51,842.580017,1252.300049
