# Data Querying

## Importing libraries and initiating sagemaker session

In [6]:
#!pip install awswrangler

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import sagemaker
import boto3
import botocore
import awswrangler as wr

config = botocore.config.Config()
sm = boto3.client(service_name="sagemaker", config=config)
sess = sagemaker.Session(sagemaker_client=sm)

bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = sess.boto_region_name

In [7]:
wr.catalog.create_database(
    name='UK Online Retail Store Database',
    exist_ok=True
)

In [8]:
wr.catalog.create_csv_table(database = "UK Online Retail Store Database",
                           path = f"s3://{bucket}/data/input/",
                           table = "df_input",
                           columns_types = {"CustomerID": "float",
                                            "Country": "string",
                                            "Recency": "int",
                                            "Frequency": "int",
                                            "DailySpending": "float",
                                            "DailyTransCOunt":"float",
                                            "MonetaryValue_x": "float",
                                            "MonetaryValue_y": "float"},
                           mode = "overwrite",
                           skip_header_line_count = 1,
                           sep = ",")

In [9]:
from IPython.core.display import display, HTML

display(HTML('<b>Review <a target="top" href="https://console.aws.amazon.com/glue/home?region={}#">AWS Glue Catalog</a></b>'.format(region)))

In [10]:
wr.athena.create_athena_bucket()

's3://aws-athena-query-results-397738742408-us-east-2/'

#### Reviewing the data

In [13]:
sql_statment = """
SELECT *
FROM df_input
"""

wr.athena.read_sql_query(sql=sql_statment, database = "UK Online Retail Store Database")

Unnamed: 0,customerid,country,recency,frequency,dailyspending,dailytranscount,monetaryvalue_x,monetaryvalue_y
0,13313.0,United Kingdom,53,31,304.869995,16.0,609.739990,945.580017
1,18097.0,United Kingdom,43,49,637.020020,24.0,1274.040039,1241.239990
2,16656.0,United Kingdom,30,27,625.744019,5.0,3128.719971,2729.825928
3,16875.0,United Kingdom,134,46,402.545013,23.0,805.090027,1290.439941
4,13094.0,United Kingdom,29,12,124.199997,2.0,869.400024,834.239990
...,...,...,...,...,...,...,...,...
1866,13238.0,United Kingdom,1,11,227.699997,11.0,227.699997,381.880005
1867,15208.0,United Kingdom,1,13,258.119995,13.0,258.119995,546.599976
1868,17837.0,United Kingdom,1,12,119.800003,12.0,119.800003,1473.579956
1869,15584.0,United Kingdom,0,36,154.649994,36.0,154.649994,570.460022
