zalun · zalun · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
+## [0.1.4] - 2026-02-27
+
+### Added
+- `src/docproc/ocr.py` — Async OCR extraction via DeepFellow easyOCR API
+- Retry logic with exponential backoff (3 attempts, 1s initial delay, 2x factor)
+- File validation for supported types (PDF, PNG, JPG, JPEG, TIFF)
+- `ocr_endpoint` field in `DeepfellowConfig`
+- `httpx` as explicit dependency for HTTP calls
+- `pytest-asyncio` dev dependency with `asyncio_mode = "auto"`
+- Test suite for OCR module (~20 tests)
+
 ## [0.1.3] - 2026-02-27
 
 ### Added

diff --git a/config-example.yaml b/config-example.yaml
@@ -5,6 +5,7 @@ directories:
 deepfellow:
   base_url: "http://localhost:8000"
   responses_endpoint: "/v1/responses"
+  ocr_endpoint: "/v1/ocr"
   api_key: "${DEEPFELLOW_API_KEY}"
   vision_model: "gpt-4-vision"
   llm_model: "deepseek"

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "docproc"
-version = "0.1.3"
+version = "0.1.4"
 requires-python = ">=3.14"
 dependencies = [
     "watchdog>=4.0.0",
@@ -9,6 +9,7 @@ dependencies = [
     "pyyaml>=6.0",
     "gradio>=4.0.0",
     "python-dotenv>=1.0.0",
+    "httpx>=0.28.0",
 ]
 
 [build-system]
@@ -27,11 +28,13 @@ dev = [
     "pytest-cov>=6.0.0",
     "ruff>=0.11.0",
     "ty>=0.0.1a0",
+    "pytest-asyncio>=0.25.0",
 ]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 addopts = "--cov=docproc --cov-report=term-missing --cov-fail-under=80"
+asyncio_mode = "auto"
 
 [tool.ruff]
 target-version = "py314"

diff --git a/src/docproc/__init__.py b/src/docproc/__init__.py
@@ -1 +1 @@
-__version__ = "0.1.3"
+__version__ = "0.1.4"
diff --git a/src/docproc/config.py b/src/docproc/config.py
@@ -27,6 +27,7 @@ class DeepfellowConfig(BaseModel):
 
     base_url: str = Field(min_length=1)
     responses_endpoint: str = Field(min_length=1)
+    ocr_endpoint: str = Field(min_length=1)
     api_key: str
     vision_model: str = Field(min_length=1)
     llm_model: str = Field(min_length=1)

diff --git a/src/docproc/ocr.py b/src/docproc/ocr.py
@@ -0,0 +1,175 @@
+"""OCR extraction via DeepFellow easyOCR API.
+
+Sends PDF/image files to the remote easyOCR endpoint and returns
+structured text with page-level breakdown. Runs async for parallel
+execution with Vision extraction.
+"""
+
+import asyncio
+import logging
+from pathlib import Path
+from typing import Any
+
+import httpx
+from pydantic import ValidationError
+
+from docproc.config import Config
+from docproc.models import OCRResult, PageText
+
+logger = logging.getLogger(__name__)
+
+SUPPORTED_EXTENSIONS = frozenset({".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif"})
+
+_MAX_RETRIES = 3
+_INITIAL_DELAY = 1.0
+_BACKOFF_FACTOR = 2.0
+_TIMEOUT_SECONDS = 120.0
+
+
+class OCRError(Exception):
+    """Raised when OCR extraction fails."""
+
+
+def _validate_file(file_path: Path) -> None:
+    """Check that the file exists, is a regular file, and has a supported extension."""
+    if not file_path.is_file():
+        msg = f"File not found or not a regular file: {file_path}"
+        raise OCRError(msg)
+    ext = file_path.suffix.lower()
+    if ext not in SUPPORTED_EXTENSIONS:
+        msg = f"Unsupported file type: {ext}"
+        raise OCRError(msg)
+
+
+def _build_url(config: Config) -> str:
+    """Join base_url and ocr_endpoint into a full URL."""
+    base = config.deepfellow.base_url.rstrip("/")
+    endpoint = config.deepfellow.ocr_endpoint
+    if not endpoint.startswith("/"):
+        endpoint = "/" + endpoint
+    return base + endpoint
+
+
+def _parse_response(data: dict[str, Any]) -> OCRResult:
+    """Convert API JSON response to an OCRResult."""
+    if "pages" not in data:
+        keys = list(data.keys())
+        msg = f"Malformed OCR response: missing 'pages' key. Response keys: {keys}"
+        raise OCRError(msg)
+    try:
+        pages = [
+            PageText(page_number=p["page_number"], text=p["text"])
+            for p in data["pages"]
+        ]
+        full_text = "\n\n".join(p.text for p in pages)
+        confidence = data.get("confidence")
+        return OCRResult(text=full_text, pages=pages, confidence=confidence)
+    except (KeyError, TypeError, ValidationError) as exc:
+        msg = f"Malformed OCR response: {exc}"
+        raise OCRError(msg) from exc
+
+
+async def _send_with_retry(
+    client: httpx.AsyncClient,
+    url: str,
+    file_path: Path,
+    api_key: str,
+) -> dict[str, Any]:
+    """POST the file with exponential backoff retry on 5xx/timeouts."""
+    try:
+        file_bytes = file_path.read_bytes()
+    except OSError as exc:
+        msg = f"Failed to read file {file_path}: {exc}"
+        raise OCRError(msg) from exc
+
+    delay = _INITIAL_DELAY
+    last_error: Exception | None = None
+
+    for attempt in range(1, _MAX_RETRIES + 1):
+        try:
+            files = {"file": (file_path.name, file_bytes)}
+            headers = {"Authorization": f"Bearer {api_key}"}
+            response = await client.post(
+                url,
+                files=files,
+                headers=headers,
+                timeout=_TIMEOUT_SECONDS,
+            )
+
+            if response.status_code >= 500:
+                last_error = OCRError(
+                    f"Server error {response.status_code}: {response.text}"
+                )
+                logger.warning(
+                    "OCR attempt %d/%d for '%s' failed (HTTP %d): %s",
+                    attempt,
+                    _MAX_RETRIES,
+                    file_path.name,
+                    response.status_code,
+                    response.text[:200],
+                )
+                if attempt < _MAX_RETRIES:
+                    await asyncio.sleep(delay)
+                    delay *= _BACKOFF_FACTOR
+                continue
+
+            if response.status_code >= 400:
+                msg = f"Client error {response.status_code}: {response.text}"
+                raise OCRError(msg)
+
+            try:
+                return response.json()
+            except ValueError as exc:
+                msg = (
+                    f"OCR API returned non-JSON response "
+                    f"(status {response.status_code}): {response.text[:200]}"
+                )
+                raise OCRError(msg) from exc
+
+        except httpx.TransportError as exc:
+            last_error = OCRError(f"Transport error: {exc}")
+            logger.warning(
+                "OCR attempt %d/%d for '%s' failed with transport error: %s",
+                attempt,
+                _MAX_RETRIES,
+                file_path.name,
+                exc,
+            )
+            if attempt < _MAX_RETRIES:
+                await asyncio.sleep(delay)
+                delay *= _BACKOFF_FACTOR
+
+    msg = f"OCR failed after {_MAX_RETRIES} attempts"
+    logger.error(
+        "OCR extraction failed for '%s' after %d attempts: %s",
+        file_path.name,
+        _MAX_RETRIES,
+        last_error,
+    )
+    raise OCRError(msg) from last_error
+
+
+async def extract_text(file_path: Path, config: Config) -> OCRResult:
+    """Extract text from a document using DeepFellow easyOCR.
+
+    Args:
+        file_path: Path to PDF or image file.
+        config: Application configuration.
+
+    Returns:
+        OCRResult with extracted text and page breakdown.
+
+    Raises:
+        OCRError: If extraction fails after retries.
+    """
+    _validate_file(file_path)
+    url = _build_url(config)
+
+    logger.info("Starting OCR extraction: %s", file_path.name)
+
+    async with httpx.AsyncClient() as client:
+        data = await _send_with_retry(client, url, file_path, config.deepfellow.api_key)
+
+    result = _parse_response(data)
+    logger.info("OCR complete: %s (%d pages)", file_path.name, len(result.pages))
+    return result
diff --git a/tests/test_config.py b/tests/test_config.py
@@ -21,6 +21,7 @@
     "deepfellow": {
         "base_url": "http://localhost:8000",
         "responses_endpoint": "/v1/responses",
+        "ocr_endpoint": "/v1/ocr",
         "api_key": "test-key",
         "vision_model": "gpt-4-vision",
         "llm_model": "deepseek",

diff --git a/tests/test_init.py b/tests/test_init.py
@@ -2,4 +2,4 @@
 
 
 def test_version_matches_expected():
-    assert __version__ == "0.1.3"
+    assert __version__ == "0.1.4"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,4 +2,4 @@


		def test_version_matches_expected():
		assert __version__ == "0.1.3"
		assert __version__ == "0.1.4"