In [None]:
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate

from common import get_vector_store


initial_proppt = "how to computationally design an antibody for H5N1"
vector = get_vector_store("collection2")

In [None]:
# 优化前
res = vector.as_retriever().invoke(initial_proppt)

for d in res:
    print(d.page_content)


In [None]:
from pydantic import BaseModel, Field
llm = ChatOllama(model="llama3.1:latest")

tmpl = """
You are a Query Optimization Engine that takes a user query and a context and returns a list of optimized search queries.

The original query is used to generate a plan, but is not quite suitable for retrieval, 

try to enrich the user query with provided professional knowledge and tools in the context.

Make sure the returned queries are precise and have low noise.

Context: {context}

Original Query: {query}
"""

prompt = PromptTemplate.from_template(tmpl)

class Output(BaseModel):
   queries: list[str] = Field(description="Optimized search queries")

chain = prompt | llm.with_structured_output(Output)

In [None]:

context = """
You have tools available:
1. Alphafold3, Foldx: predict the 3D structure of antibody sequences from an input Excel file and saves the result as a PDB file.
2. Gearbind, ddg: predicts binding affinities of antibodies
"""

output: Output = chain.invoke({
    "query": initial_proppt,
    "context": context
})

In [None]:
output

In [None]:
from langchain_core.documents import Document

print(f"Initial prompt: {initial_proppt}\n")

print(f"Optimized query for RAG: {output.queries}\n")

print("Retrieved documents: \n")
for q in output.queries:
    res: list[Document] = vector.as_retriever().invoke(q)
    for d in res:
        print(f'{d.metadata['source']}, page: {d.metadata["page"]}')
        print("------content------")
        print(d.page_content)
        print("\n")
