# Query engine for Docling parsed Markdown files 

This notebook demonstrates the use of the `DoclingMdQueryEngine` for retrieval-augmented question answering over documents. It shows how to set up the engine with Docling parsed Markdown files, and execute natural language queries against the indexed data. 

The `DoclingMdQueryEngine` integrates persistent ChromaDB vector storage with LlamaIndex for efficient document retrieval.

In [None]:
%pip install llama-index-vector-stores-chroma==0.4.1
%pip install llama-index==0.12.16

In [None]:
import os

import autogen

config_list = autogen.config_list_from_json(env_or_file="../OAI_CONFIG_LIST")

assert len(config_list) > 0
print("models to use: ", [config_list[i]["model"] for i in range(len(config_list))])

# Put the OpenAI API key into the environment
os.environ["OPENAI_API_KEY"] = config_list[0]["api_key"]

In [None]:
from autogen.agentchat.contrib.rag.docling_query_engine import DoclingMdQueryEngine

query_engine = DoclingMdQueryEngine(db_path="./tmp/chroma")

In [None]:
input_dir = "/workspaces/ag2/test/agentchat/contrib/rag/pdf_parsed/"
query_engine.init_db(input_dir=input_dir)

In [None]:
print(query_engine.get_collection_name())

In [None]:
question = "How much money did Nvidia spend in research and development"
answer = query_engine.query(question)
print(answer)

In [None]:
input_docs = ["/workspaces/ag2/test/agentchat/contrib/rag/pdf_parsed/nvidia_10k_2024.md"]
query_engine.init_db(input_doc_paths=input_docs)

In [None]:
question = "How much money did Nvidia spend in research and development"
answer = query_engine.query(question)
print(answer)

In [None]:
new_docs = ["/workspaces/ag2/test/agentchat/contrib/rag/pdf_parsed/Toast_financial_report.md"]
query_engine.add_docs(new_doc_paths=new_docs)

In [None]:
question = "How much money did Toast earn in 2024"
answer = query_engine.query(question)
print(answer)