In [22]:
import boto3
import polars as pl
import json
import awswrangler as wr

In [23]:
boto3_session = boto3.Session(region_name='us-east-1')

In [24]:
catalog_id = "529088288102"
table_list = ["employee", "department", "enrollment", "service_history", "services"]
table_schema = {}
for table in table_list:
    response = wr.catalog.get_table_types(database="finetune_llm_querygen", table=table, catalog_id=catalog_id)
    print(response)
    table_schema[table] = table_schema

print(table_schema)

{'employee_id': 'bigint', 'first_name': 'string', 'last_name': 'string', 'department_id': 'bigint', 'hire_date': 'string'}
{'department_id': 'bigint', 'department_name': 'string'}
{'employee_id': 'bigint', 'service_id': 'bigint', 'enrollment_date': 'string'}
{'service_id': 'bigint', 'employee_id': 'bigint', 'department_id': 'bigint', 'start_date': 'string', 'end_date': 'string'}
{'service_id': 'bigint', 'service_name': 'string'}
{'employee': {...}, 'department': {...}, 'enrollment': {...}, 'service_history': {...}, 'services': {...}}


In [25]:
bedrock_client = boto3.client("bedrock-runtime", region_name="us-east-1")

In [61]:
prompt = f"""
generate the sql query for the following spec and refer to the table schema provided below as reference for the actual table column names: and
fetch employee names and list all the departments they served as a service history report where employee details, department details and service details data are hosted in separated tables.
table schema for employee table {table_schema['employee']},  department table {table_schema['department']}, service history {table_schema['service_history']}
Just give SQL output no explanation required
Sample query:
SELECT 
    e.employee_id,
    e.first_name,
    e.last_name,
    d.department_name,
    sh.start_date,
    sh.end_date
FROM 
    finetune_llm_querygen.employee e
JOIN 
    finetune_llm_querygen.service_history sh ON e.employee_id = sh.employee_id
JOIN 
    finetune_llm_querygen.department d ON sh.department_id = d.department_id
ORDER BY 
    e.employee_id, sh.start_date
"""

print(prompt)


generate the sql query for the following spec and refer to the table schema provided below as reference for the actual table column names: and
fetch employee names and list all the departments they served as a service history report where employee details, department details and service details data are hosted in separated tables.
table schema for employee table {'employee': {...}, 'department': {...}, 'enrollment': {...}, 'service_history': {...}, 'services': {...}},  department table {'employee': {...}, 'department': {...}, 'enrollment': {...}, 'service_history': {...}, 'services': {...}}, service history {'employee': {...}, 'department': {...}, 'enrollment': {...}, 'service_history': {...}, 'services': {...}}
Just give SQL output no explanation required
Sample query:
SELECT 
    e.employee_id,
    e.first_name,
    e.last_name,
    d.department_name,
    sh.start_date,
    sh.end_date
FROM 
    finetune_llm_querygen.employee e
JOIN 
    finetune_llm_querygen.service_history sh ON e

In [62]:
body = json.dumps(
    {
        "anthropic_version" : "bedrock-2023-05-31",
        "max_tokens" : 512,
        "messages" : [
            {
                "role" : "user",
                "content" : [{"type": "text", "text": prompt}]
            }],
        "temperature" : 1
    
    }
)

prompt_request = {
    "body" : body,
    "modelId" : "anthropic.claude-3-5-sonnet-20240620-v1:0"
}

In [63]:
response = bedrock_client.invoke_model(modelId=prompt_request['modelId'], body=prompt_request['body'])
response_json = json.loads(response.get('body').read())
response_text = response_json.get('content')[0].get('text')
print(response_text)

SELECT 
    e.employee_id,
    e.first_name,
    e.last_name,
    d.department_name,
    sh.start_date,
    sh.end_date
FROM 
    employee e
JOIN 
    service_history sh ON e.employee_id = sh.employee_id
JOIN 
    department d ON sh.department_id = d.department_id
ORDER BY 
    e.employee_id, sh.start_date


In [65]:
# response_text = str(response_text).replace("employee_name","first_name")
df = pl.from_pandas(wr.athena.read_sql_query(sql=response_text,
                              database="finetune_llm_querygen",
                              workgroup='primary',
                              boto3_session=boto3_session,
                              data_source='AwsDataCatalog'
                             ))
print(df.head())

shape: (5, 6)
┌─────────────┬────────────┬───────────┬─────────────────┬────────────┬────────────┐
│ employee_id ┆ first_name ┆ last_name ┆ department_name ┆ start_date ┆ end_date   │
│ ---         ┆ ---        ┆ ---       ┆ ---             ┆ ---        ┆ ---        │
│ i64         ┆ str        ┆ str       ┆ str             ┆ str        ┆ str        │
╞═════════════╪════════════╪═══════════╪═════════════════╪════════════╪════════════╡
│ 1           ┆ Bryan      ┆ Diaz      ┆ Sales           ┆ 2017-09-28 ┆ 2019-04-04 │
│ 5           ┆ Michelle   ┆ Guzman    ┆ HR              ┆ 2015-07-15 ┆ 2016-09-10 │
│ 7           ┆ Donna      ┆ Beck      ┆ IT              ┆ 2016-05-16 ┆ 2019-03-09 │
│ 8           ┆ Deanna     ┆ Sanford   ┆ Sales           ┆ 2020-04-01 ┆ 2021-11-17 │
│ 9           ┆ Marcus     ┆ Garcia    ┆ IT              ┆ 2016-03-30 ┆ 2018-03-25 │
└─────────────┴────────────┴───────────┴─────────────────┴────────────┴────────────┘
