## Data Extraction with Docling

In this notebook, extract content from PDFs into structured formats:

- **Markdown**: Full document text with page breaks for chunking
- **Images**: Save pages containing large charts/diagrams (>500x500 pixels)
- **Tables**: Extract with 2 paragraphs of context + page number metadata

**Output Structure:**
```
data/rag-data/markdown/{company}/{document}.md
data/rag-data/images/{company}/{document}/page_5.png
data/rag-data/tables/{company}/{document}/table_1_page_5.md
```

### 1. Setup and Configuration

In [1]:
from pathlib import Path
from typing import List, Tuple

from docling_core.types.doc import PictureItem
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption

In [2]:
# Directory paths
DATA_DIR = "data/rag-data/pdfs"
OUTPUT_MD_DIR = "data/rag-data/markdown"
OUTPUT_IMAGES_DIR = "data/rag-data/images"
OUTPUT_TABLES_DIR = "data/rag-data/tables"

### Metadata Extraction

In [3]:
def extract_metadata_from_filename(filename: str):
    """
    Extract metadata from filename.
    
    Expected format: CompanyName DocType [Quarter] Year.pdf
    Examples:
        - Amazon 10-Q Q1 2024.pdf
        - Microsoft 10-K 2023.pdf
    """

    filename = filename.replace('.pdf', '').replace('.md', '')
    parts = filename.split()

    return {
        'company_name': parts[0],
        'doc_type': parts[1],
        'fiscal_quarter': parts[2] if len(parts)==4 else None,
        'fiscal_year': parts[-1]
    }

extract_metadata_from_filename('apple 10-k 2023.pdf')

{'company_name': 'apple',
 'doc_type': '10-k',
 'fiscal_quarter': None,
 'fiscal_year': '2023'}

### Extract Markdown

In [4]:
def convert_pdf_to_docling(pdf_file: Path):

    pipeline_options = PdfPipelineOptions()
    pipeline_options.images_scale = 2
    pipeline_options.generate_picture_images = True
    pipeline_options.generate_page_images = True

    doc_converter = DocumentConverter(
        format_options={
            InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
        }
    )

    return doc_converter.convert(pdf_file)

In [8]:
pdf_file = Path("data\\rag-data\\pdfs\\apple\\apple 8-k q4 2023.pdf")
pdf_file.stem

meta_data = extract_metadata_from_filename(pdf_file.stem)
company_name = meta_data['company_name']

md_dir = Path(OUTPUT_MD_DIR) / company_name
images_dir = Path(OUTPUT_IMAGES_DIR) / company_name / pdf_file.stem
tables_dir = Path(OUTPUT_TABLES_DIR) / company_name / pdf_file.stem

for dir_path in [md_dir, images_dir, tables_dir]:
    dir_path.mkdir(parents=True, exist_ok=True)

doc_converter = convert_pdf_to_docling(pdf_file)
markdown_text = doc_converter.document.export_to_markdown(page_break_placeholder="<!-- page break --!>")

[32m[INFO] 2026-02-11 13:54:07,807 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m


[32m[INFO] 2026-02-11 13:54:07,817 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 13:54:07,818 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 13:54:07,914 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 13:54:07,916 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 13:54:07,917 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 13:54:07,974 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 13:54:07,986 [RapidOCR] download_file.py:60: File exi

In [9]:
(md_dir / f"{pdf_file.stem}.md").write_text(markdown_text, encoding="utf-8")

28575

In [14]:
def save_page_images(doc_converter, images_dir: Path):
    """
    Find and save pages with large images (>500x500 pixels).
    """

    pages_to_save = set()

    for item in doc_converter.document.iterate_items():
        element = item[0]

        if isinstance(element, PictureItem):
            image = element.get_image(doc_converter.document)

            if image.size[0]>500 and image.size[1]>500:
                page_no = element.prov[0].page_no if element.prov else None

                if page_no:
                    pages_to_save.add(page_no)


        # save images
        for page_no in pages_to_save:
            page = doc_converter.document.pages[page_no]

            page.image.pil_image.save(images_dir/ f"page_{page_no}.png", "PNG")


In [13]:
save_page_images(doc_converter, images_dir)

In [15]:
def extract_context_and_table(lines: List[str], table_index: int):
    """
    Extract context and table content at a specific position.
    
    Args:
        lines: All markdown lines
        table_index: Where the table starts
    
    Returns:
        (combined_content, next_line_index)
    """

    table_lines = []
    i = table_index

    while (i < len(lines)) and (lines[i].startswith('|')):
        table_lines.append(lines[i])
        i = i + 1


    # previous 2 lines as table context
    start = max(0, table_index-2)
    context_lines = lines[start: table_index]

    content = '\n'.join(context_lines) + '\n\n' + '\n'.join(table_lines)

    return content, i
    

In [16]:
def extract_tables_with_context(markdown_text: str):
    """
    Find all tables and extract them with context and page numbers.
    
    Returns:
        List of (content, table_name, page_number)
    """

    lines = markdown_text.split('\n')
    lines = [line for line in lines if line.strip()]
    tables = []
    current_page = 1
    table_num = 1
    i = 0

    while(i< len(lines)):
        # track page numbers
        if '<!-- page break -->' in lines[i]:
            current_page = current_page + 1
            i = i + 1
            continue

        # Table detected
        if lines[i].startswith('|') and lines[i].count('|')>1:
            content, next_i = extract_context_and_table(lines, i)

            tables.append((content, f"table_{table_num}", current_page))
            table_num = table_num + 1
            i = next_i

        else:
            i = i + 1


    return tables

In [17]:
def save_tables(markdown_text, tables_dir):

    tables = extract_tables_with_context(markdown_text)

    for table_content, table_name, page_num in tables:
        content_with_page = f"**Page:** {page_num}\n\n{table_content}"
                
        (tables_dir/f"{table_name}_page_{page_num}.md").write_text(content_with_page, encoding='utf-8')


In [19]:
def extract_pdf_content(pdf_file):
    metadata = extract_metadata_from_filename(pdf_file.stem)

    company_name = metadata['company_name']

    md_dir = Path(OUTPUT_MD_DIR) / company_name
    images_dir = Path(OUTPUT_IMAGES_DIR) / company_name / pdf_file.stem
    tables_dir = Path(OUTPUT_TABLES_DIR) / company_name / pdf_file.stem

    for dir_path in [md_dir, images_dir, tables_dir]:
        dir_path.mkdir(parents=True, exist_ok=True)


    doc_converter = convert_pdf_to_docling(pdf_file)

    markdown_text = doc_converter.document.export_to_markdown(page_break_placeholder="<!-- page break -->")

    (md_dir / f"{pdf_file.stem}.md").write_text(markdown_text, encoding='utf-8')

    save_page_images(doc_converter, images_dir)

    save_tables(markdown_text, tables_dir)

In [20]:
extract_pdf_content(pdf_file)

[32m[INFO] 2026-02-11 15:20:55,791 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:20:55,807 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:20:55,808 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:20:55,919 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:20:55,936 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 15:20:55,937 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 15:20:56,031 [RapidOCR] base.py:22: Using engine_name

In [21]:
data_path = Path(DATA_DIR)

In [22]:
pdf_files = data_path.rglob("*.pdf")
for idx, pdf_file in enumerate(pdf_files):
    print(pdf_file)
    extract_pdf_content(pdf_file)

[32m[INFO] 2026-02-11 15:22:55,085 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:22:55,091 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:22:55,091 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:22:55,184 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:22:55,187 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 15:22:55,187 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m


data\rag-data\pdfs\amazon\amazon 10-k 2023.pdf


[32m[INFO] 2026-02-11 15:22:55,242 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:22:55,255 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:22:55,256 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:28:58,327 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:28:58,335 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:28:58,335 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:28:58,425 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m


data\rag-data\pdfs\amazon\amazon 10-k 2024.pdf


[32m[INFO] 2026-02-11 15:28:58,505 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:28:58,506 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:32:38,848 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:32:38,854 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:32:38,855 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:32:38,951 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:32:38,954 [RapidOCR] download_file.py:60: File exists and is valid: 

data\rag-data\pdfs\amazon\amazon 10-q q1 2024.pdf


[32m[INFO] 2026-02-11 15:32:39,020 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:32:39,032 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:32:39,032 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:37:30,941 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:37:30,948 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:37:30,949 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:37:31,043 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m


data\rag-data\pdfs\amazon\amazon 10-q q1 2025.pdf


[32m[INFO] 2026-02-11 15:37:31,107 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:37:31,119 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:37:31,121 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:39:27,459 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:39:27,467 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:39:27,468 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:39:27,560 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m


data\rag-data\pdfs\amazon\amazon 10-q q2 2024.pdf


[32m[INFO] 2026-02-11 15:39:27,627 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:39:27,639 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:39:27,640 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:47:05,726 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:47:05,733 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:47:05,734 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:47:05,828 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m


data\rag-data\pdfs\amazon\amazon 10-q q2 2025.pdf


[32m[INFO] 2026-02-11 15:47:05,887 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:47:05,899 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:47:05,900 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:51:32,427 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:51:32,434 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:51:32,435 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:51:32,551 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m


data\rag-data\pdfs\amazon\amazon 10-q q3 2024.pdf


[32m[INFO] 2026-02-11 15:51:32,625 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:51:32,639 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:51:32,639 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:55:49,758 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:55:49,765 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:55:49,766 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 15:55:49,861 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m


data\rag-data\pdfs\apple\apple 10-k 2023.pdf


[32m[INFO] 2026-02-11 15:55:49,922 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 15:55:49,934 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 15:55:49,935 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:05:22,450 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:05:22,457 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:05:22,458 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:05:22,552 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m


data\rag-data\pdfs\apple\apple 10-k 2024.pdf


[32m[INFO] 2026-02-11 16:05:22,628 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:05:22,629 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:09:45,406 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:09:45,413 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:09:45,414 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:09:45,515 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:09:45,518 [RapidOCR] download_file.py:60: File exists and is valid: 

data\rag-data\pdfs\apple\apple 10-q q1 2024.pdf


[32m[INFO] 2026-02-11 16:09:45,576 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:09:45,588 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:09:45,589 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:11:16,821 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:11:16,828 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:11:16,829 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:11:16,936 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m


data\rag-data\pdfs\apple\apple 10-q q2 2024.pdf


[32m[INFO] 2026-02-11 16:11:16,940 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 16:11:17,003 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:11:17,015 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:11:17,016 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:21:07,482 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:21:07,490 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:21:07,491 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaco

data\rag-data\pdfs\apple\apple 10-q q4 2023.pdf


[32m[INFO] 2026-02-11 16:22:28,688 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:22:28,695 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:22:28,695 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:22:28,788 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:22:28,791 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 16:22:28,791 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 16:22:28,847 [RapidOCR] base.py:22: Using engine_name

data\rag-data\pdfs\apple\apple 8-k q4 2023.pdf


[32m[INFO] 2026-02-11 16:22:55,494 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:22:55,500 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:22:55,500 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:22:55,587 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:22:55,590 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 16:22:55,591 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m


data\rag-data\pdfs\google\google 10-k 2023.pdf


[32m[INFO] 2026-02-11 16:22:55,664 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:22:55,676 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:22:55,677 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:29:25,526 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:29:25,532 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:29:25,533 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:29:25,649 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m


data\rag-data\pdfs\google\google 10-k 2024.pdf


[32m[INFO] 2026-02-11 16:29:25,729 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:29:25,741 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:29:25,742 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:35:53,927 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:35:53,939 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:35:53,940 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m


data\rag-data\pdfs\google\google 10-q q1 2025.pdf


[32m[INFO] 2026-02-11 16:35:54,045 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:35:54,065 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 16:35:54,066 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 16:35:54,139 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:35:54,170 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:35:54,171 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:39:22,772 [RapidOCR] base.py:22: Using engine_name

data\rag-data\pdfs\google\google 10-q q2 2024.pdf


[32m[INFO] 2026-02-11 16:39:22,882 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:39:22,886 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 16:39:22,887 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 16:39:22,965 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:39:22,976 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:39:22,976 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:43:32,519 [RapidOCR] base.py:22: Using engine_name

data\rag-data\pdfs\google\google 10-q q2 2025.pdf


[32m[INFO] 2026-02-11 16:43:32,707 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:43:32,720 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:43:32,721 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:48:06,850 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:48:06,858 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:48:06,858 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:48:06,974 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m


data\rag-data\pdfs\google\google 10-q q3 2024.pdf


[32m[INFO] 2026-02-11 16:48:07,046 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:48:07,057 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:48:07,058 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:52:23,475 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:52:23,480 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 16:52:23,481 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m


data\rag-data\pdfs\meta\meta 10-k 2023.pdf


[32m[INFO] 2026-02-11 16:52:23,591 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:52:23,595 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 16:52:23,595 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 16:52:23,673 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 16:52:23,686 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 16:52:23,687 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 17:25:36,509 [RapidOCR] base.py:22: Using engine_name

data\rag-data\pdfs\meta\meta 10-k 2024.pdf


[32m[INFO] 2026-02-11 17:25:36,529 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 17:25:36,531 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 17:25:36,889 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 17:25:36,897 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 17:25:36,899 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 17:25:37,090 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 17:25:37,127 [RapidOCR] download_file.py:60: File exi

data\rag-data\pdfs\meta\meta 10-q q1 2024.pdf


[32m[INFO] 2026-02-11 20:41:16,409 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 20:41:16,419 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 20:41:16,421 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 20:41:16,645 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 20:41:16,686 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 20:41:16,688 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 20:44:32,154 [RapidOCR] base.py:22: Using engine_name

data\rag-data\pdfs\meta\meta 10-q q1 2025.pdf


[32m[INFO] 2026-02-11 20:44:32,312 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 20:44:32,324 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 20:44:32,324 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 20:47:48,717 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 20:47:48,726 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 20:47:48,727 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 20:47:48,839 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m


data\rag-data\pdfs\meta\meta 10-q q2 2024.pdf


[32m[INFO] 2026-02-11 20:47:48,916 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 20:47:48,929 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 20:47:48,930 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 20:52:22,246 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 20:52:22,254 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 20:52:22,255 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 20:52:22,351 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m


data\rag-data\pdfs\meta\meta 10-q q2 2025.pdf


[32m[INFO] 2026-02-11 20:52:22,428 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 20:52:22,429 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 20:55:52,606 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 20:55:52,620 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-11 20:55:52,622 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m


data\rag-data\pdfs\meta\meta 10-q q3 2024.pdf


[32m[INFO] 2026-02-11 20:55:53,141 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 20:55:53,147 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 20:55:53,148 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 20:55:53,368 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 20:55:53,396 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 20:55:53,398 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 21:01:10,442 [RapidOCR] base.py:22: Using engine_name

data\rag-data\pdfs\meta\meta 10-q q3 2025.pdf


[32m[INFO] 2026-02-11 21:01:10,874 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 21:01:10,881 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 21:01:10,883 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 21:01:11,147 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 21:01:11,174 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 21:01:11,176 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 21:05:58,736 [RapidOCR] base.py:22: Using engine_name

data\rag-data\pdfs\meta\meta 10-q q4 2024.pdf


[32m[INFO] 2026-02-11 21:05:59,119 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 21:05:59,127 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 21:05:59,128 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-11 21:05:59,357 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-11 21:05:59,399 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
[32m[INFO] 2026-02-11 21:05:59,402 [RapidOCR] main.py:53: Using C:\Users\ASUSZ\anaconda3\envs\venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.onnx[0m
