# Quick Start

In [2]:
import os
from openai import OpenAI
from loguru import logger
import sys
sys.path.append(os.path.abspath(os.path.join('..', 'model_evaluation')))


# Basic Usage

In [3]:
# The model path you used to start the vLLM server
MODEL_PATH = "/home/ubuntu/workspace/mistral-nemo-minitron-8b-instruct-healthcare-text2sql_vV2.8"

# vLLM server details from your running instance
IP = "localhost"
PORT = 8000
BASE_URL = f"http://{IP}:{PORT}/v1"

In [4]:
# Initialize the OpenAI client to connect to your local vLLM server
client = OpenAI(
    api_key="not-needed",  # The API key is not required for local server
    base_url=BASE_URL,
)

In [5]:
# Simple DDL (Data Definition Language) for table schema
ddl = """\
CREATE TABLE patients (
    patient_id INT PRIMARY KEY,
    name VARCHAR(255),
    age INT,
    disease VARCHAR(255)
);
"""

In [6]:
# Instruction for the model
instruction = "Generate a SQLite query to answer the following question."

# The user's question
question = "How many patients are older than 50?"

# Format the prompt using the chat template from mistral-vllm.py
prompt_chat_template = [
    {
        "role": "system",
        "content": f"Based on DDL statements, instructions, and the current date, generate a SQL query in the following sqlite to answer the question:\n\nDDL statements:\n{ddl}\nInstructions:\n{instruction}",
    },
    {
        "role": "user",
        "content": f"{question}"
    }
]

In [7]:
logger.info("Sending request to vLLM server...")

try:
    response = client.chat.completions.create(
        model=MODEL_PATH,
        messages=prompt_chat_template,
        temperature=0.0,
        max_tokens=512,  # Maximum length of the generated SQL query
        stop=["<extra_id_1>"] # Optional: stop sequence if your model uses one
    )

    # --- Print the response ---
    generated_sql = response.choices[0].message.content
    logger.success("\n✅ Server responded successfully!")
    logger.info("\nGenerated SQL Query:")
    logger.info(generated_sql)

except Exception as e:
    logger.error(f"\n❌ An error occurred: {e}")

[32m2025-07-27 03:15:20.901[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mSending request to vLLM server...[0m
[32m2025-07-27 03:15:22.051[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m14[0m - [32m[1m
✅ Server responded successfully![0m
[32m2025-07-27 03:15:22.052[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m15[0m - [1m
Generated SQL Query:[0m
[32m2025-07-27 03:15:22.052[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m16[0m - [1m ```sql
SELECT COUNT(DISTINCT patient_id) FROM patients WHERE age > 50;
```
[0m
