# Table annotations

In [1]:
from docling_core.types.doc.document import DoclingDocument

file_path = "2408.09869v3.json"
pages = {5}  # pages to serialize (for output brevity)

doc = DoclingDocument.load_from_json(file_path)

In [2]:
from typing import Optional
from rich.console import Console
from rich.panel import Panel

def print_excerpt(
    txt: str, *, limit: int = 2000, title: Optional[str] = None, min_width: int = 80,
    table_end: str = "--|"
):
    excerpt = txt[:limit]
    width = max(
        max([ln.rfind(table_end) for ln in excerpt.splitlines()]) + len(table_end) + 4,
        min_width,
    )
    console = Console(width=width)
    console.print(Panel(f"{excerpt}{'...' if len(txt)>limit else ''}", title=title))

## Adding a table annotation

Below we add a demo table annotation, picking the first table for illustrative purposes.

Note that `TableMiscData` allows any dict data within the `content` field.


In [3]:
from docling_core.types.doc.document import DescriptionAnnotation, MiscAnnotation

assert doc.tables, "No table available in this document"
table = doc.tables[0]

table.add_annotation(
    annotation=DescriptionAnnotation(
        text="A typical Docling setup runtime characterization.",
        provenance="model-foo",
    ),
)

table.add_annotation(
    annotation=MiscAnnotation(
        content={
            "type": "performance data",
            "sentiment": 0.85,
            # ...
        },
    ),
)

## Default serialization

In [4]:
from docling_core.transforms.serializer.markdown import (
    MarkdownDocSerializer,
    MarkdownParams,
)

ser = MarkdownDocSerializer(
    doc=doc,
    params=MarkdownParams(
        pages=pages,
    ),
)
ser_out = ser.serialize()
ser_txt = ser_out.text

print_excerpt(ser_txt, title=f"{pages=}")

## Custom serialization

In [5]:
from typing import Any

from docling_core.transforms.serializer.base import SerializationResult
from docling_core.transforms.serializer.common import create_ser_result
from docling_core.transforms.serializer.markdown import MarkdownAnnotationSerializer
from docling_core.types.doc.document import MiscAnnotation, DocItem

class CustomAnnotationSerializer(MarkdownAnnotationSerializer):
    def serialize(
        self,
        *,
        item: DocItem,
        doc: DoclingDocument,
        **kwargs: Any,
    ) -> SerializationResult:
        text_parts: list[str] = []

        # reusing result from parent serializer:
        parent_res = super().serialize(
            item=item,
            doc=doc,
            **kwargs,
        )
        text_parts.append(parent_res.text)

        # custom serialization logic (appending misc annotation result):
        for ann in item.get_annotations():
            if isinstance(ann, MiscAnnotation):
                out_txt = "".join([f"- {k}: {ann.content[k]}\n" for k in ann.content])
                text_parts.append(out_txt)
        text_res = "\n\n".join(text_parts)
        return create_ser_result(text=text_res, span_source=item)

In [6]:
ser = MarkdownDocSerializer(
    doc=doc,
    annotation_serializer=CustomAnnotationSerializer(),
    params=MarkdownParams(
        pages=pages,
    ),
)
ser_out = ser.serialize()
ser_txt = ser_out.text

print_excerpt(ser_txt, title=f"{pages=}")