# Docling PDF Reader example

In [None]:
from rich.pretty import pprint
import warnings
import os
from dotenv import load_dotenv

load_dotenv()
warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")

## Basic usage

By default, `DoclingPDFReader` exports to Markdown. Basic usage looks like this:

In [None]:
from llama_index.readers.docling.base import DoclingPDFReader

reader = DoclingPDFReader()
docs = reader.load_data(file_path="https://arxiv.org/pdf/2408.09869")

Fetching 9 files:   0%|          | 0/9 [00:00<?, ?it/s]

Previewing a sample of the loaded doc's Markdown text:

In [None]:
import rich

md_snippet = docs[0].text[:700]
rich.print(f"{md_snippet}...")

## RAG demo

Setting up the node parser:

In [None]:
from llama_index.core.node_parser import MarkdownNodeParser

node_parser = MarkdownNodeParser()

Getting the nodes (chunks) and displaying a sample:

In [None]:
nodes = node_parser.get_nodes_from_documents(documents=docs)
pprint(nodes[7], max_length=2, max_string=250, max_depth=2)

Setting up the embed model:

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

Setting up the vector store:

In [None]:
from tempfile import TemporaryDirectory
from llama_index.vector_stores.milvus import MilvusVectorStore

vector_store = MilvusVectorStore(
    uri=os.environ.get(
        "MILVUS_URL", default=f"{(tmp_dir := TemporaryDirectory()).name}/milvus_demo.db"
    ),
    collection_name="docling_collection",
    dim=len(embed_model.get_text_embedding("hi")),
    overwrite=True,
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Setting up the index:

In [None]:
from llama_index.core import StorageContext, VectorStoreIndex

storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex(
    nodes=nodes,
    embed_model=embed_model,
    storage_context=storage_context,
    show_progress=True,
)

Generating embeddings:   0%|          | 0/33 [00:00<?, ?it/s]

Setting up the LLM:

In [None]:
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI

HF_TOKEN = os.environ.get("HF_TOKEN")

llm = HuggingFaceInferenceAPI(
    token=HF_TOKEN,
    model_name="mistralai/Mistral-7B-Instruct-v0.3",
)




And now we are ready to perform questions on our document content:

In [None]:
query_engine = index.as_query_engine(llm=llm, similarity_top_k=10)
query_res = query_engine.query("How many pages were human annotated?")
pprint(query_res, max_length=1, max_string=70, max_depth=4)