# Table annotations

In [1]:
from rich.console import Console
from rich.panel import Panel

console = Console(
    width=180,  # for getting Markdown tables rendered nicely
)

In [2]:
from docling_core.types.doc.document import DoclingDocument

file_path = "2408.09869v3.json"
pages = {5}  # pages to serialize (for output brevity)

doc = DoclingDocument.load_from_json(file_path)

## Adding a table annotation

Below we add a demo table annotation, picking the first table for illustrative purposes.

Note that `TableMiscData` allows any dict data within the `content` field.


In [3]:
from docling_core.types.doc.document import TableMiscData

assert doc.tables, "No table available in this document"
table = doc.tables[0]

my_annotation = TableMiscData(
    content={
        "summary": "Typical Docling setup runtime characterization.",
        "type": "performance data",
        # ...
    },
)
table.add_annotation(
    annotation=my_annotation,
)

## Default serialization

In [4]:
from docling_core.transforms.serializer.markdown import (
    MarkdownDocSerializer,
    MarkdownParams,
)

ser = MarkdownDocSerializer(
    doc=doc,
    params=MarkdownParams(
        pages=pages,
    ),
)
ser_out = ser.serialize()
ser_txt = ser_out.text
console.print(Panel(f"{ser_txt[:2000]}...", title=f"{pages=}"))

## Custom serialization

In [5]:
from typing import Any

from docling_core.transforms.serializer.base import (
    BaseDocSerializer,
    SerializationResult,
)
from docling_core.transforms.serializer.common import create_ser_result
from docling_core.transforms.serializer.markdown import MarkdownTableSerializer
from docling_core.types.doc.document import (
    TableItem,
)
from typing_extensions import override


class CustomAnnotationTableSerializer(MarkdownTableSerializer):
    @override
    def serialize(
        self,
        *,
        item: TableItem,
        doc_serializer: BaseDocSerializer,
        doc: DoclingDocument,
        **kwargs: Any,
    ) -> SerializationResult:
        params = MarkdownParams(**kwargs)

        text_parts: list[str] = []

        if params.include_annotations:
            for ann in item.annotations:
                if isinstance(ann, TableMiscData):

                    # custom serialization logic:
                    out_txt = "\n".join([f"{k}: {ann.content[k]}" for k in ann.content])
                    text_parts.append(out_txt)

        # reusing the existing result (excluding the annotations):
        parent_res = super().serialize(
            item=item,
            doc_serializer=doc_serializer,
            doc=doc,
            **{**kwargs, **{"include_annotations": False}},
        )
        text_parts.append(parent_res.text)

        text_res = "\n\n".join(text_parts)
        return create_ser_result(text=text_res, span_source=item)

In [6]:
ser = MarkdownDocSerializer(
    doc=doc,
    table_serializer=CustomAnnotationTableSerializer(),
    params=MarkdownParams(
        pages=pages,
    ),
)
ser_out = ser.serialize()
ser_txt = ser_out.text
console.print(Panel(f"{ser_txt[:2000]}...", title=f"{pages=}"))