In [2]:
from pymongo import MongoClient
from langchain_core.documents import Document # Keep Document as it's used in the query function signature if needed later
from langchain_ollama import ChatOllama

from app.chroma_client import chroma_client

In [3]:
MONGODB_URI = "mongodb://127.0.0.1:27017/mydb"  # Use the service name 'mongo'
mongo_client = MongoClient(MONGODB_URI)
mdb = mongo_client.mydb
mongo_collection = mdb.mycollection

In [87]:
user_query = "What is the most negative comments?"
separator = "#next_question#"
expand_to_n = 5

In [95]:
llm_model = ChatOllama(
    model="llama3.2:3b",
)

EXPANTION_PROMPT_TEMPLATE = f"""You are an AI language model assistant.
Your task is to generate {expand_to_n} different versions of the given user question or sentence to 
retrieve relevant documents from a vector database.
By generating multiple perspectives on the user question or sentence, your goal is to help
the user overcome some of the limitations of the distance-based similarity search.
Database stores comments to pull-requests.
Provide these alternative questions or sentences seperated by '{separator}'. 
Don't add to your answer additional information.
Original question: {user_query}

Additional questions:
"""

EXPANTION_WITH_EXAMPLES_PROMPT_TEMPLATE = f"""You are an AI language model assistant.
Your task is to generate {expand_to_n} examples of answers to the given user question to 
retrieve relevant documents from a vector database.
The database stores comments for pull-requests.
Provide these answers separated by '{separator}'.
Don't add additional information.
Original question: {user_query}

Example responses:
"""

prompt = EXPANTION_WITH_EXAMPLES_PROMPT_TEMPLATE

In [96]:
_llm_response = llm_model.invoke(prompt)
additional_questions = _llm_response.content.split(separator)[1:]

In [97]:
_llm_response

AIMessage(content='#1 The most negative comment in the vector database could be retrieved using a query such as "negativity score > 0.8 AND sentiment label = \'negative\'".\n\n#2 To find the most negative comments, we can use a natural language processing (NLP) technique like sentiment analysis to calculate the negativity score of each comment.\n\n#3 We can also consider using a ranking algorithm that takes into account the frequency and severity of negative comments in the database.\n\n#4 Another approach would be to apply machine learning techniques such as clustering or deep learning models trained on sentiment analysis tasks.\n\n#5 Additionally, we could utilize vector similarity search algorithms like an embedding-based method to find comments with similar negative sentiments.', additional_kwargs={}, response_metadata={'model': 'llama3.2:3b', 'created_at': '2025-04-22T07:26:09.036185Z', 'done': True, 'done_reason': 'stop', 'total_duration': 3614089291, 'load_duration': 26263125, '

In [98]:
for question in additional_questions:
    print(question)

additional_questions_str = " ".join(additional_questions).replace('\n\n', ' ')

In [80]:
augmented_query = f"{user_query} {additional_questions_str}"
print(augmented_query)

Get the most negative comments. 


In [81]:
results = chroma_client.similarity_search(augmented_query)

context_comments = "\n\n --- \n\n".join([doc.page_content for doc in results])

In [82]:
context_comments

'Add a comment explaining why this specific approach was taken.\n\n --- \n\nThe performance of this query could be improved.\n\n --- \n\nPlease reformat this section to adhere to PEP 8 guidelines.\n\n --- \n\nThis function could benefit from some inline comments explaining the logic.'