In [240]:
import boto3
import polars as pl
import json
import awswrangler as wr

In [241]:
boto3_session = boto3.Session(region_name='us-east-1')

In [242]:
catalog_id = "529088288102"
table_list = ["employee", "department", "enrollment", "service_history", "services"]
table_schema = {}
for table in table_list:
    response = wr.catalog.get_table_types(database="finetune_llm_querygen", table=table, catalog_id=catalog_id)
    print(response)
    table_schema[table] = table_schema

print(table_schema)

{'employee_id': 'bigint', 'first_name': 'string', 'last_name': 'string', 'department_id': 'bigint', 'hire_date': 'string'}
{'department_id': 'bigint', 'department_name': 'string'}
{'employee_id': 'bigint', 'service_id': 'bigint', 'enrollment_date': 'string'}
{'service_id': 'bigint', 'employee_id': 'bigint', 'department_id': 'bigint', 'start_date': 'string', 'end_date': 'string'}
{'service_id': 'bigint', 'service_name': 'string'}
{'employee': {...}, 'department': {...}, 'enrollment': {...}, 'service_history': {...}, 'services': {...}}


In [243]:
bedrock_client = boto3.client("bedrock-runtime", region_name="us-east-1")

In [244]:
prompt = """
Assume you are an AI SQL assistant. Generate SQL queries based on the provided prompts. The schema for different tables in the data lake is as follows:
{"database" : "finetune_llm_querygen",
"tables" : 
{ "employees" :  {'employee_id': 'bigint', 'first_name': 'string', 'last_name': 'string', 'department_id': 'bigint', 'hire_date': 'string'},
"departments" : {'department_id': 'bigint', 'department_name': 'string'},
"service_history" : {'service_id': 'bigint', 'employee_id': 'bigint', 'department_id': 'bigint', 'start_date': 'string', 'end_date': 'string'},
"enrollments" : {'employee_id': 'bigint', 'service_id': 'bigint', 'enrollment_date': 'string'}
}
}
Examples:
prompt: List all employees in the IT department.
completion: Select * from employee e Join department d on e.department_id = d.department_id where d.department_name = 'IT';

prompt: get all services available in the system.
completion: Select * from services;

prompt: find all employees with the last_name 'Smith'.
completion: Select * from employee where last_name = 'Smith';

"""



In [245]:
# prompt = f"""
# Generate the sql to retrieve employee information and their service history using the following tables employee table schema : {table_schema['employee']}, department table schema : {table_schema['department']},
# service_history table schema: {table_schema['service_history']}. Use the table information from the provided tables schema and columns only. Just give SQL output no explanation required.
# """

# prompt = f"""
# Generate the sql to list all the employees from the employee table who are in the department '103'. No details required.
# """

prompt = f"""
generate the sql to list the employee_id and their department name. No details required.
"""

print(prompt)


generate the sql to list the employee_id and their department name. No details required.



In [246]:
body = json.dumps(
    {
        "anthropic_version" : "bedrock-2023-05-31",
        "max_tokens" : 512,
        "messages" : [
            {
                "role" : "user",
                "content" : [{"type": "text", "text": prompt}]
            }],
        "temperature" : 1
    
    }
)

prompt_request = {
    "body" : body,
    "modelId" : "anthropic.claude-3-5-sonnet-20240620-v1:0"
}

In [247]:
import re
response = bedrock_client.invoke_model(modelId=prompt_request['modelId'], body=prompt_request['body'])
response_json = json.loads(response.get('body').read())
response_text = response_json.get('content')[0].get('text')
print(response_text)
if "```sql" in response_text:
    response_text = re.compile(r'```sql(.*?)```', re.DOTALL).findall(response_text)
print(response_text)

Here's a simple SQL query to list the employee_id and their department name:

```sql
SELECT e.employee_id, d.department_name
FROM employees e
JOIN departments d ON e.department_id = d.department_id;
```

This query assumes:
1. There's an "employees" table with columns including "employee_id" and "department_id"
2. There's a "departments" table with columns including "department_id" and "department_name"
3. The two tables are related by the "department_id" column

The query joins these tables to match each employee with their department name.
['\nSELECT e.employee_id, d.department_name\nFROM employees e\nJOIN departments d ON e.department_id = d.department_id;\n']


In [248]:
df = pl.from_pandas(wr.athena.read_sql_query(sql=response_text[0],
                              database="finetune_llm_querygen",
                              workgroup='primary',
                              boto3_session=boto3_session,
                              data_source='AwsDataCatalog'
                             ))
print(df.head())

shape: (5, 2)
┌─────────────┬─────────────────┐
│ employee_id ┆ department_name │
│ ---         ┆ ---             │
│ i64         ┆ str             │
╞═════════════╪═════════════════╡
│ 1           ┆ Sales           │
│ 2           ┆ IT              │
│ 3           ┆ HR              │
│ 4           ┆ HR              │
│ 5           ┆ HR              │
└─────────────┴─────────────────┘
