In [1]:
import json
import os
from chromadb.config import Settings
from langchain_community.chat_models import ChatOllama
from langchain_chroma import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.schema.output_parser import StrOutputParser

In [2]:
# Initialize embeddings, vector store, and model
embeddings = HuggingFaceEmbeddings(
    model_name="dangvantuan/vietnamese-embedding",
    model_kwargs={"device": "cuda"},
)

setting = Settings(
    anonymized_telemetry=False,
    is_persistent=True,
)

vector_store = Chroma(
    persist_directory='.DB_llama3.1',
    embedding_function=embeddings,
    client_settings=setting,
)

retriever = vector_store.as_retriever()

model = ChatOllama(model="llama3.1")

  embeddings = HuggingFaceEmbeddings(
  from tqdm.autonotebook import tqdm, trange
⚠️ It looks like you upgraded from a version below 0.6 and could benefit from vacuuming your database. Run chromadb utils vacuum --help for more information.


In [3]:
def format_markdown_line(string: str) -> str:
    return f"\n{string}\n".replace("\n", """
""")

def process_questions_from_json(json_path: str, output_md_file: str):
    try:
        # Step 1: Load questions from JSON file
        with open(json_path, "r") as f:
            questions_list = json.load(f)

        # Step 2: Initialize a variable to store previous question's result
        previous_response_content = ""

        # Step 3: Loop through each question in the list
        for question_data in questions_list:
            question = question_data.get("question")

            # Step 4: Retrieve the documents
            retrieved_docs = retriever.get_relevant_documents(question)
            context = "\n".join([doc.page_content for doc in retrieved_docs])

            # If there's a previous question, add its result to the context
            if question_data.get("is_next_question") is True:
                context += format_markdown_line(f"Previous Response: {previous_response_content}")

            # Step 5: Generate the final prompt
            prompt_template = PromptTemplate.from_template(
                """
                <s> [INST] You are a helpful assistant, answer questions about ingested news documents. 
                Use only the context provided, do not use any information outside of this context. 
                If you don't know, just say that you don't know.[/INST] </s> 
                [INST] Question: {question} 
                Context: {context} 
                Answer: [/INST]
                """
            )
            prompt = prompt_template.format(question=question, context=context)

            # Step 6: Send the prompt to the model
            response = model.invoke(prompt)

            # Extract model's response content and retrieved document content
            model_response_content = response.content
            retrieved_docs_content = "".join([format_markdown_line(f"* {doc.page_content}") for doc in retrieved_docs])

            # Step 7: Append results to markdown file
            try:
                with open(output_md_file, "a") as md_file:
                    md_file.write(format_markdown_line(f"## Question: {question}"))
                    md_file.write(
                        format_markdown_line(f"**Response:**\n\n{model_response_content}")
                    )
                    md_file.write(
                        format_markdown_line(
                            f"**Retrieved Documents:**\n\n{retrieved_docs_content}"
                        )
                    )
            except Exception as e:
                print(f"Error appending to markdown file: {e}")
                continue

            # Step 8: Update previous response content for the next question
            previous_response_content = model_response_content

    except Exception as e:
        print(f"Error processing questions: {e}")

In [4]:
# Example usage
json_path = 'questions.json'
output_md_file = 'questions_results.md'
process_questions_from_json(json_path, output_md_file)

  retrieved_docs = retriever.get_relevant_documents(question)
