In [228]:
import boto3

In [229]:
bd_clt = boto3.client("bedrock-runtime", region_name="us-east-1")

In [230]:
question = "User: Find all employees with the last name \"ERIC\" who are enrolled in a service?"

In [231]:
prompt = """
You are an AI SQL assistant. Generate SQL queries based on natural language prompts. Use the given schema as reference. 
Also append within quotes the database name with a dot in outputquery.

Schema:
{
  "tables": {
    "employee": {
      "columns": {
        "employee_id": "int",
        "first_name": "varchar",
        "last_name": "varchar",
        "department_id": "int",
        "hire_date": "date"
      }
    },
    "department": {
      "columns": {
        "department_id": "int",
        "department_name": "varchar"
      }
    },
    "services": {
      "columns": {
        "service_id": "int",
        "service_name": "varchar"
      }
    },
    "enrollment": {
      "columns": {
        "employee_id": "int",
        "service_id": "int",
        "enrollment_date": "date"
      }
    }
  }
}

Examples:
User: List all employees in the IT department.
AI: SELECT * FROM "employee" e JOIN department d ON e.department_id = d.department_id WHERE d.department_name = 'IT';

User: Get all employees hired after January 1, 2021.
AI: SELECT * FROM "employee" WHERE hire_date > "2021-01-01";

User: Get all services available in the system.
AI: SELECT * FROM "services";

User: Find all employees with the last name 'Smith'.
AI: SELECT * FROM "employee" WHERE last_name = 'Smith';

User: Get the department details of employees hired before 2020.
AI: SELECT e.first_name, e.last_name, d.department_name FROM "employee" e JOIN department d ON e.department_id = d.department_id WHERE e.hire_date < '2020-01-01';

User: List all services employees have enrolled in.
AI: SELECT s.service_name, e.first_name, e.last_name FROM "employee" en JOIN services s ON en.service_id = s.service_id JOIN employee e ON en.employee_id = e.employee_id;

User: Which employees are enrolled in the Health Insurance service?
AI: SELECT e.first_name, e.last_name FROM "employee" en JOIN services s ON en.service_id = s.service_id JOIN employee e ON en.employee_id = e.employee_id WHERE s.service_name = 'Health Insurance';

User: What is the total number of employees in each department?
AI: SELECT d.department_name, COUNT(e.employee_id) AS employee_count FROM "employee" e JOIN department d ON e.department_id = d.department_id GROUP BY d.department_name;

User: Find employees who have never enrolled in a service.
AI: SELECT e.first_name, e.last_name FROM "employee" e LEFT JOIN enrollment en ON e.employee_id = en.employee_id WHERE en.employee_id IS NULL;

User: Which employees are enrolled in multiple services?
AI: SELECT e.first_name, e.last_name, COUNT(en.service_id) AS service_count FROM "enrollment" en JOIN employee e ON en.employee_id = e.employee_id GROUP BY e.first_name, e.last_name HAVING COUNT(en.service_id) > 1;

User: What is the total number of enrollments per service?
AI: SELECT s.service_name, COUNT(en.service_id) AS total_enrollments FROM "enrollment" en JOIN services s ON en.service_id = s.service_id GROUP BY s.service_name;

Now, generate the SQL query for the following request:
User: Find all employees with the last name 'Anderson'?
AI: 
"""

In [234]:
model_id = "amazon.titan-text-express-v1" # "amazon.titan-embed-text-v2:0" # "anthropic.claude-3-5-haiku-20241022-v1:0" #"anthropic.claude-3-haiku-20240307-v1:0"  #"anthropic.claude-3-5-haiku-20241022-v1:0"
#prompt = "Give me the current date time in EST"
native_request = {
 "inputText": prompt,
    "textGenerationConfig": {
        "maxTokenCount": 512,
        "temperature": 0.5,
    }
}
import json
request = json.dumps(native_request)
response = bd_clt.invoke_model(modelId =model_id, body = request)
output = json.loads(response['body'].read())
print(output)
sql_stmt = output['results'][0]['outputText']
print(sql_stmt)

{'inputTextTokenCount': 973, 'results': [{'tokenCount': 20, 'outputText': 'SELECT * FROM "employee" WHERE last_name = \'Anderson\';', 'completionReason': 'FINISH'}]}
SELECT * FROM "employee" WHERE last_name = 'Anderson';


In [235]:
athena_client = boto3.client("athena", region_name = 'us-east-1')

bucket_name = "query-gen-hackathon"


In [236]:
response = athena_client.start_query_execution(QueryString= sql_stmt, QueryExecutionContext = {'Database': 'finetune_llm_querygen', 'Catalog': 'AwsDataCatalog'},
                                              ResultConfiguration = {'OutputLocation': "s3://query-gen-hackathon/athena-query-result", 
                                                                    })
query_id = response['QueryExecutionId']
print(query_id)

ae62aa66-f30a-40ae-97a8-13a3f3a88da4


In [237]:
exec_status = athena_client.get_query_execution(QueryExecutionId=query_id)
print(exec_status)

{'QueryExecution': {'QueryExecutionId': 'ae62aa66-f30a-40ae-97a8-13a3f3a88da4', 'Query': 'SELECT * FROM "employee" WHERE last_name = \'Anderson\'', 'StatementType': 'DML', 'ResultConfiguration': {'OutputLocation': 's3://query-gen-hackathon/athena-query-result/ae62aa66-f30a-40ae-97a8-13a3f3a88da4.csv'}, 'ResultReuseConfiguration': {'ResultReuseByAgeConfiguration': {'Enabled': False}}, 'QueryExecutionContext': {'Database': 'finetune_llm_querygen', 'Catalog': 'awsdatacatalog'}, 'Status': {'State': 'QUEUED', 'SubmissionDateTime': datetime.datetime(2025, 2, 26, 19, 56, 6, 543000, tzinfo=tzlocal())}, 'Statistics': {'TotalExecutionTimeInMillis': 53, 'QueryQueueTimeInMillis': 53, 'ResultReuseInformation': {'ReusedPreviousResult': False}}, 'WorkGroup': 'primary', 'EngineVersion': {'SelectedEngineVersion': 'AUTO', 'EffectiveEngineVersion': 'Athena engine version 3'}, 'SubstatementType': 'SELECT'}, 'ResponseMetadata': {'RequestId': '7187cd9f-c533-403d-b582-77672fd6ff3c', 'HTTPStatusCode': 200, 'H

In [239]:

s3_client = boto3.client('s3')
resp = s3_client.get_object(Bucket = bucket_name, Key = f'athena-query-result/{query_id}.csv')
print(resp['Body'])
import pandas as pd
output = pd.read_csv(resp['Body'])
print("****")
print(output)

<botocore.response.StreamingBody object at 0x7ff64e362830>
****
   employee_id first_name last_name  department_id   hire_date
0            4     Angela  Anderson            102  2021-02-19
1           15     Brandi  Anderson            102  2023-12-13
2           67    Garrett  Anderson            102  2023-08-09
3           94        Jon  Anderson            102  2021-10-07
