Install deps

In [1]:
! pip install psycopg-binary psycopg tabulate text-generation langchain langchain-community

Collecting psycopg-binary
  Downloading psycopg_binary-3.1.19-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.8 kB)
Collecting psycopg
  Downloading psycopg-3.1.19-py3-none-any.whl.metadata (4.2 kB)
Collecting tabulate
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Collecting text-generation
  Downloading text_generation-0.7.0-py3-none-any.whl.metadata (8.5 kB)
Collecting langchain
  Downloading langchain-0.2.0-py3-none-any.whl.metadata (13 kB)
Collecting langchain-community
  Downloading langchain_community-0.2.0-py3-none-any.whl.metadata (8.8 kB)
Collecting aiohttp<4.0,>=3.8 (from text-generation)
  Downloading aiohttp-3.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.5 kB)
Collecting huggingface-hub<1.0,>=0.12 (from text-generation)
  Downloading huggingface_hub-0.23.0-py3-none-any.whl.metadata (12 kB)
Collecting pydantic<3,>2 (from text-generation)
  Downloading pydantic-2.7.1-py3-none-any.whl.metadata (107 kB)
[2K 

import all

In [2]:
import psycopg
import os
from tabulate import tabulate
from langchain_community.llms import HuggingFaceTextGenInference
from langchain_core.prompts import PromptTemplate

Postgres things

In [3]:
conn = psycopg.connect(
	dbname=os.environ.get("DATABASE_NAME"),
	host=os.environ.get("POSTGRES_URL"),
	user=os.environ.get("OWNERUSERNAME"),
	password=os.environ.get("OWNERPASSWORD"),
	autocommit=True)

db_schema = conn.execute("SELECT table_name, column_name as Columns, data_type as DataTypes FROM  information_schema.columns where table_name NOT LIKE 'pg_stat%' AND table_schema='public' order by table_name,column_name;")
colnames = [desc[0] for desc in db_schema.description]
db_schema_formatted=tabulate(db_schema.fetchall(), headers=colnames, tablefmt='psql')

llm things

In [8]:
llm = HuggingFaceTextGenInference(
    #inference_server_url=os.environ.get("LLM_ENDPOINT"),
    # my mistake, i wrote wrong url in the jupyter envs so lets use hardcoded version now
    # i really dont want to restart notebook right now
    inference_server_url="http://llm-service.llm:8000",
    temperature=0.5,
    top_k=10,
    top_p=0.5,
    repetition_penalty=1.03,
)

sql_prompt_template = PromptTemplate.from_template("""
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful AI assistant that can transform user queries into SQL commands to retrieve the data from the Postgresql database. The database has the next tables schema:
{db_schema}

Please prepare and return only the SQL command, based on the user query, without any formatting or newlines. The answer must contain only valid SQL command.<|eot_id|><|start_header_id|>user<|end_header_id|>
{query}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
""")

final_prompt_template = PromptTemplate.from_template("""
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful AI assistant that can understand Postgresql replies and explain this data to the user. The database has the next tables schema:
{db_schema}

User query: {query}

Postgresql reply:
{postgres_reply}

Base your answer on the provided user query and Postgresql reply.
Generate a draft response using the selected information.
It should be easy to understand your answer. Don't add any introductory words, start answering right away. 
Keep your answer to a one or two sentences (if possible) that specifically answers the user's question. If not - try to keep the answer short, summarizing the returned data.
Generate your final response after adjusting it to increase accuracy and relevance.
Now only show your final response!
If you do not know the answer or context is not relevant, response with "I don't know".
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
""")

handle_query function with all app logic

In [5]:
def handle_query(query):
    sql_query=llm.invoke(sql_prompt_template.format(db_schema=db_schema_formatted, query=query))
    try:
        postgres_reply = conn.execute(sql_query)
    except psycopg.Error as e:
        print("Unable to process query: ", query)
        return "Try another query"
    colnames = [desc[0] for desc in postgres_reply.description]
    postgres_reply_data = postgres_reply.fetchall()
    if postgres_reply_data == []:
        return "Received empty SQL answer, try another query"
    postgres_reply_formatted=tabulate(postgres_reply_data, headers=colnames, tablefmt='psql')
    # debug section
    # print(sql_query)
    # print(postgres_reply_formatted)
    # end of debug section
    return llm.invoke(final_prompt_template.format(db_schema=db_schema_formatted, query=query, postgres_reply=postgres_reply_formatted))

ask something

In [7]:
print(handle_query("Please calculate the total sum of all John transactions."))

InvalidURL: Failed to parse: http://llm-service.llm:8000=