In [1]:
# %% [markdown]
# # DocProcess API - Production Testing Notebook
# 
# This script tests the production endpoints for:
# - Latency measurements
# - Accuracy validation
# - API functionality
# 
# Run each cell independently using VS Code's "Run Cell" or Cursor's interactive mode.

# %% [markdown]
# ## 1. Setup & Configuration

# %%
import requests
import time
import json
from datetime import datetime
from typing import Optional
import statistics

# =============================================================================
# CONFIGURATION - Update these values
# =============================================================================

# Production API URL (Railway)
API_BASE_URL = "https://web-production-00a7f.up.railway.app"

# Modal Direct URL (for comparison)
MODAL_DIRECT_URL = "https://vivek12345singh--docling-service-convert-endpoint.modal.run"

# Test documents
TEST_DOCUMENTS = {
    "arxiv_docling": "https://arxiv.org/pdf/2501.17887",  # Docling paper (8 pages)
    "arxiv_attention": "https://arxiv.org/pdf/1706.03762",  # Attention paper (15 pages)
    "simple_pdf": "https://www.w3.org/WAI/WCAG21/Techniques/pdf/img/table-word.pdf",  # Simple PDF
}

In [2]:
import requests

# You (admin) create a key
response = requests.post(
    "https://web-production-00a7f.up.railway.app/v1/keys",
    json={
        "name": "Acme Corp",      # Client name
        "tier": "starter",         # Their plan
        "credits": 500             # Initial credits
    }
)

key_data = response.json()
print(f"Give this to client: {key_data['key']}")
# Output: dk_abc123_secretXYZ...

Give this to client: dk_QBgCJkoyQdw_h2w0T5psWw6ULAUgaswiMxlDW8QAOguuTg0ADqtS5lQ


In [8]:
import requests

API_URL = "https://web-production-00a7f.up.railway.app"
API_KEY = "dk_BYyVTukml1A_sTrxMX8NoUT6uaN-YfKchHhXvbycJwI4Bu00wZDLpco"  # Use your latest key

# Upload local PDF directly
with open("/Users/viveksingh/Desktop/American-casinos-CIM.pdf", "rb") as f:
    response = requests.post(
        f"{API_URL}/v1/convert/file",
        headers={"Authorization": f"Bearer {API_KEY}"},
        files={"file": ("American-casinos-CIM.pdf", f, "application/pdf")},
        data={
            "enable_ocr": "true",
            "force_full_page_ocr": "true"
        },
        timeout=300
    )

if response.status_code == 200:
    data = response.json()
    markdown = data["results"][0]["markdown"]
    print(f"‚úÖ Pages: {data['results'][0]['pages']}")
    print(f"üìù Content:\n{markdown[:2000]}...")
else:
    print(f"‚ùå Error: {response.text}")

‚úÖ Pages: 1


TypeError: 'NoneType' object is not subscriptable

In [None]:
https://drive.google.com/file/d/12VMTIGmYKv3BMe9Dio-M8t_fZfU8Dczj/view?usp=sharing

In [2]:
# Store results
test_results = {
    "api_key": None,
    "latency_tests": [],
    "accuracy_tests": [],
}

print("‚úÖ Configuration loaded")
print(f"   API URL: {API_BASE_URL}")
print(f"   Modal URL: {MODAL_DIRECT_URL}")

‚úÖ Configuration loaded
   API URL: https://web-production-00a7f.up.railway.app
   Modal URL: https://vivek12345singh--docling-service-convert-endpoint.modal.run


In [5]:
# %% [markdown]
# ## 2. Health Check

# %%
def check_health():
    """Check API health status."""
    print("üîç Checking API Health...")
    print("-" * 50)
    
    start = time.time()
    response = requests.get(f"{API_BASE_URL}/health")
    latency = (time.time() - start) * 1000
    
    data = response.json()
    
    print(f"Status Code: {response.status_code}")
    print(f"Latency: {latency:.2f}ms")
    print(f"Response: {json.dumps(data, indent=2)}")
    
    return data

health = check_health()

üîç Checking API Health...
--------------------------------------------------
Status Code: 200
Latency: 561.82ms
Response: {
  "status": "healthy",
  "version": "1.0.0",
  "docling_backend": "healthy",
  "timestamp": "2026-01-24T06:21:25.934506"
}


In [6]:
# %% [markdown]
# ## 3. Create API Key

# %%
def create_api_key(name: str = "Test Key", credits: int = 100):
    """Create a new API key for testing."""
    print("üîë Creating API Key...")
    print("-" * 50)
    
    response = requests.post(
        f"{API_BASE_URL}/v1/keys",
        json={"name": name, "credits": credits}
    )
    
    data = response.json()
    
    print(f"Status Code: {response.status_code}")
    print(f"Key ID: {data.get('id')}")
    print(f"API Key: {data.get('key', '')[:30]}...")
    print(f"Credits: {data.get('credits')}")
    
    # Store for later use
    test_results["api_key"] = data.get("key")
    
    return data

key_data = create_api_key("Production Test", credits=500)


üîë Creating API Key...
--------------------------------------------------
Status Code: 201
Key ID: dk_8olMREYlIrY
API Key: dk_8olMREYlIrY_r5QWts_ARmV1__m...
Credits: 500


NameError: name 'test_results' is not defined

In [5]:
# %% [markdown]
# ## 4. Single Document Conversion Test

# %%
def convert_document(url: str, api_key: Optional[str] = None):
    """Convert a single document and measure performance."""
    api_key = api_key or test_results["api_key"]
    
    print(f"üìÑ Converting: {url[:50]}...")
    print("-" * 50)
    
    start = time.time()
    
    response = requests.post(
        f"{API_BASE_URL}/v1/convert/source",
        headers={"Authorization": f"Bearer {api_key}"},
        json={
            "sources": [{"kind": "http", "url": url}],
            "options": {"output_format": "markdown"}
        },
        timeout=300  # 5 minute timeout for large docs
    )
    
    total_latency = (time.time() - start) * 1000
    
    data = response.json()
    
    if response.status_code == 200 and "results" in data:
        result = data["results"][0]
        markdown = result.get("markdown", "")
        
        print(f"‚úÖ Status: {result.get('status')}")
        print(f"üìÑ Pages: {result.get('pages')}")
        print(f"üìù Markdown Length: {len(markdown):,} chars")
        print(f"‚è±Ô∏è  Total Latency: {total_latency:.2f}ms ({total_latency/1000:.2f}s)")
        print(f"üí∞ Credits Used: {data.get('credits_used')}")
        print(f"üí≥ Credits Remaining: {data.get('credits_remaining')}")
        
        # Calculate per-page metrics
        pages = result.get('pages', 1)
        print(f"\nüìä Performance Metrics:")
        print(f"   Latency per page: {total_latency/pages:.2f}ms")
        print(f"   Chars per page: {len(markdown)/pages:.0f}")
        
        return {
            "success": True,
            "url": url,
            "pages": pages,
            "markdown_length": len(markdown),
            "latency_ms": total_latency,
            "latency_per_page_ms": total_latency / pages,
            "credits_used": data.get("credits_used"),
            "markdown_preview": markdown[:500]
        }
    else:
        print(f"‚ùå Error: {data}")
        return {
            "success": False,
            "url": url,
            "error": data
        }

# Test with Docling paper
result = convert_document(TEST_DOCUMENTS["arxiv_docling"])

üìÑ Converting: https://arxiv.org/pdf/2501.17887...
--------------------------------------------------
‚úÖ Status: success
üìÑ Pages: 8
üìù Markdown Length: 37,147 chars
‚è±Ô∏è  Total Latency: 42611.01ms (42.61s)
üí∞ Credits Used: 8
üí≥ Credits Remaining: 492

üìä Performance Metrics:
   Latency per page: 5326.38ms
   Chars per page: 4643


In [8]:
result

{'success': True,
 'url': 'https://arxiv.org/pdf/2501.17887',
 'pages': 8,
 'markdown_length': 37147,
 'latency_ms': 41583.43720436096,
 'latency_per_page_ms': 5197.92965054512,
 'credits_used': 8,
 'markdown_preview': '## Docling: An Efficient Open-Source Toolkit for AI-driven Document Conversion\n\nNikolaos Livathinos * , Christoph Auer * , Maksym Lysak, Ahmed Nassar, Michele Dolfi, Panagiotis Vagenas, Cesar Berrospi, Matteo Omenetti, Kasper Dinkla, Yusik Kim, Shubham Gupta, Rafael Teixeira de Lima, Valery Weber, Lucas Morin, Ingmar Meijer, Viktor Kuropiatnyk, Peter W. J. Staar\n\nIBM Research, R¬® uschlikon, Switzerland\n\nPlease send correspondence to: deepsearch-core@zurich.ibm.com\n\n## Abstract\n\nWe introduce Docl'}

In [7]:
# %% [markdown]
# ## 5. Latency Benchmark - Multiple Documents

# %%
def run_latency_benchmark(num_runs: int = 3):
    """Run multiple conversions to measure average latency."""
    print("‚è±Ô∏è  Running Latency Benchmark...")
    print("=" * 60)
    
    api_key = test_results["api_key"]
    test_url = TEST_DOCUMENTS["arxiv_docling"]
    
    latencies = []
    
    for i in range(num_runs):
        print(f"\nüîÑ Run {i+1}/{num_runs}")
        
        start = time.time()
        response = requests.post(
            f"{API_BASE_URL}/v1/convert/source",
            headers={"Authorization": f"Bearer {api_key}"},
            json={
                "sources": [{"kind": "http", "url": test_url}],
                "options": {"output_format": "markdown"}
            },
            timeout=300
        )
        latency = (time.time() - start) * 1000
        
        if response.status_code == 200:
            data = response.json()
            pages = data["results"][0].get("pages", 1)
            latencies.append({
                "run": i + 1,
                "total_ms": latency,
                "per_page_ms": latency / pages,
                "pages": pages
            })
            print(f"   ‚úÖ {latency:.2f}ms total, {latency/pages:.2f}ms/page")
        else:
            print(f"   ‚ùå Failed: {response.text[:100]}")
    
    if latencies:
        total_latencies = [l["total_ms"] for l in latencies]
        per_page_latencies = [l["per_page_ms"] for l in latencies]
        
        print("\n" + "=" * 60)
        print("üìä LATENCY SUMMARY")
        print("=" * 60)
        print(f"Runs: {len(latencies)}")
        print(f"\nTotal Latency:")
        print(f"   Min: {min(total_latencies):.2f}ms")
        print(f"   Max: {max(total_latencies):.2f}ms")
        print(f"   Avg: {statistics.mean(total_latencies):.2f}ms")
        if len(total_latencies) > 1:
            print(f"   Std: {statistics.stdev(total_latencies):.2f}ms")
        
        print(f"\nPer-Page Latency:")
        print(f"   Min: {min(per_page_latencies):.2f}ms")
        print(f"   Max: {max(per_page_latencies):.2f}ms")
        print(f"   Avg: {statistics.mean(per_page_latencies):.2f}ms")
        
        test_results["latency_tests"] = latencies
    
    return latencies

# Run benchmark (adjust num_runs as needed)
latency_results = run_latency_benchmark(num_runs=3)

‚è±Ô∏è  Running Latency Benchmark...

üîÑ Run 1/3
   ‚úÖ 41537.37ms total, 5192.17ms/page

üîÑ Run 2/3
   ‚úÖ 8275.42ms total, 1034.43ms/page

üîÑ Run 3/3
   ‚úÖ 7551.53ms total, 943.94ms/page

üìä LATENCY SUMMARY
Runs: 3

Total Latency:
   Min: 7551.53ms
   Max: 41537.37ms
   Avg: 19121.44ms
   Std: 19416.14ms

Per-Page Latency:
   Min: 943.94ms
   Max: 5192.17ms
   Avg: 2390.18ms


In [8]:
# %% [markdown]
# ## 6. Compare: Railway API vs Modal Direct
# %%
def compare_railway_vs_modal():
    """Compare latency between Railway API and direct Modal call."""
    print("üîÑ Comparing Railway API vs Modal Direct...")
    print("=" * 60)
    
    api_key = test_results["api_key"]
    test_url = TEST_DOCUMENTS["arxiv_docling"]
    
    results = {}
    
    # Test Railway API
    print("\nüì° Testing Railway API...")
    start = time.time()
    response = requests.post(
        f"{API_BASE_URL}/v1/convert/source",
        headers={"Authorization": f"Bearer {api_key}"},
        json={
            "sources": [{"kind": "http", "url": test_url}],
            "options": {"output_format": "markdown"}
        },
        timeout=300
    )
    railway_latency = (time.time() - start) * 1000
    
    if response.status_code == 200:
        data = response.json()
        results["railway"] = {
            "latency_ms": railway_latency,
            "pages": data["results"][0].get("pages"),
            "markdown_len": len(data["results"][0].get("markdown", ""))
        }
        print(f"   ‚úÖ Latency: {railway_latency:.2f}ms")
    
    # Test Modal Direct
    print("\nüöÄ Testing Modal Direct...")
    start = time.time()
    response = requests.post(
        MODAL_DIRECT_URL,
        json={"url": test_url, "output_format": "markdown"},
        timeout=300
    )
    modal_latency = (time.time() - start) * 1000
    
    if response.status_code == 200:
        data = response.json()
        results["modal"] = {
            "latency_ms": modal_latency,
            "pages": data.get("pages"),
            "markdown_len": len(data.get("markdown", ""))
        }
        print(f"   ‚úÖ Latency: {modal_latency:.2f}ms")
    
    # Comparison
    if "railway" in results and "modal" in results:
        overhead = results["railway"]["latency_ms"] - results["modal"]["latency_ms"]
        overhead_pct = (overhead / results["modal"]["latency_ms"]) * 100
        
        print("\n" + "=" * 60)
        print("üìä COMPARISON RESULTS")
        print("=" * 60)
        print(f"Railway API:   {results['railway']['latency_ms']:.2f}ms")
        print(f"Modal Direct:  {results['modal']['latency_ms']:.2f}ms")
        print(f"Overhead:      {overhead:.2f}ms ({overhead_pct:.1f}%)")
        print(f"\nMarkdown lengths match: {results['railway']['markdown_len'] == results['modal']['markdown_len']}")
    
    return results

comparison = compare_railway_vs_modal()

üîÑ Comparing Railway API vs Modal Direct...

üì° Testing Railway API...
   ‚úÖ Latency: 8535.31ms

üöÄ Testing Modal Direct...
   ‚úÖ Latency: 7555.90ms

üìä COMPARISON RESULTS
Railway API:   8535.31ms
Modal Direct:  7555.90ms
Overhead:      979.41ms (13.0%)

Markdown lengths match: True


In [None]:
# %% [markdown]
# ## 7. Accuracy Test - Content Validation

# %%
def test_accuracy():
    """Test accuracy by checking for expected content in converted documents."""
    print("üéØ Running Accuracy Tests...")
    print("=" * 60)
    
    api_key = test_results["api_key"]
    
    # Test cases: (url, expected_strings)
    test_cases = [
        {
            "name": "Docling Paper",
            "url": TEST_DOCUMENTS["arxiv_docling"],
            "expected": [
                "Docling",
                "document conversion",
                "IBM Research",
                "PDF",
                "table",
            ]
        },
    ]
    
    results = []
    
    for test in test_cases:
        print(f"\nüìÑ Testing: {test['name']}")
        print("-" * 40)
        
        response = requests.post(
            f"{API_BASE_URL}/v1/convert/source",
            headers={"Authorization": f"Bearer {api_key}"},
            json={
                "sources": [{"kind": "http", "url": test["url"]}],
                "options": {"output_format": "markdown"}
            },
            timeout=300
        )
        
        if response.status_code == 200:
            data = response.json()
            markdown = data["results"][0].get("markdown", "").lower()
            
            found = []
            missing = []
            
            for expected in test["expected"]:
                if expected.lower() in markdown:
                    found.append(expected)
                else:
                    missing.append(expected)
            
            accuracy = len(found) / len(test["expected"]) * 100
            
            print(f"   Found: {found}")
            if missing:
                print(f"   Missing: {missing}")
            print(f"   Accuracy: {accuracy:.1f}%")
            
            results.append({
                "name": test["name"],
                "found": found,
                "missing": missing,
                "accuracy": accuracy
            })
        else:
            print(f"   ‚ùå Request failed: {response.status_code}")
    
    test_results["accuracy_tests"] = results
    return results

accuracy_results = test_accuracy()


In [8]:
# %% [markdown]
# ## 8. Load Test - Multiple Concurrent Requests

# %%
import concurrent.futures

def load_test(num_requests: int = 5, max_workers: int = 3):
    """Run multiple concurrent requests to test load handling."""
    print(f"üî• Load Test: {num_requests} requests, {max_workers} concurrent workers")
    print("=" * 60)
    
    api_key = test_results["api_key"]
    test_url = TEST_DOCUMENTS["arxiv_docling"]
    
    def make_request(request_id):
        start = time.time()
        try:
            response = requests.post(
                f"{API_BASE_URL}/v1/convert/source",
                headers={"Authorization": f"Bearer {api_key}"},
                json={
                    "sources": [{"kind": "http", "url": test_url}],
                    "options": {"output_format": "markdown"}
                },
                timeout=300
            )
            latency = (time.time() - start) * 1000
            success = response.status_code == 200
            return {
                "id": request_id,
                "success": success,
                "latency_ms": latency,
                "status_code": response.status_code
            }
        except Exception as e:
            return {
                "id": request_id,
                "success": False,
                "error": str(e)
            }
    
    results = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(make_request, i) for i in range(num_requests)]
        
        for future in concurrent.futures.as_completed(futures):
            result = future.result()
            results.append(result)
            status = "‚úÖ" if result.get("success") else "‚ùå"
            print(f"   {status} Request {result['id']}: {result.get('latency_ms', 0):.2f}ms")
    
    # Summary
    successful = [r for r in results if r.get("success")]
    if successful:
        latencies = [r["latency_ms"] for r in successful]
        print("\n" + "=" * 60)
        print("üìä LOAD TEST SUMMARY")
        print("=" * 60)
        print(f"Total Requests: {num_requests}")
        print(f"Successful: {len(successful)}")
        print(f"Failed: {num_requests - len(successful)}")
        print(f"Success Rate: {len(successful)/num_requests*100:.1f}%")
        print(f"\nLatency (successful requests):")
        print(f"   Min: {min(latencies):.2f}ms")
        print(f"   Max: {max(latencies):.2f}ms")
        print(f"   Avg: {statistics.mean(latencies):.2f}ms")
    
    return results

# Run load test (adjust parameters as needed)
# Warning: This uses credits!
load_results = load_test(num_requests=5, max_workers=5)

üî• Load Test: 5 requests, 5 concurrent workers
   ‚úÖ Request 0: 8008.76ms
   ‚úÖ Request 3: 15044.88ms
   ‚úÖ Request 4: 22387.75ms
   ‚úÖ Request 2: 29411.95ms
   ‚úÖ Request 1: 35934.61ms

üìä LOAD TEST SUMMARY
Total Requests: 5
Successful: 5
Failed: 0
Success Rate: 100.0%

Latency (successful requests):
   Min: 8008.76ms
   Max: 35934.61ms
   Avg: 22157.59ms


In [12]:
# Run the test
load_results = load_test(num_requests=3, max_workers=2)

# Then check the failures
print("\nFailed requests details:")
for r in load_results:
    if not r.get("success"):
        print(f"  Request {r['id']}: status={r.get('status_code')}, error={r.get('error')}")

üî• Load Test: 3 requests, 2 concurrent workers
   ‚ùå Request 1: 5727.38ms
   ‚ùå Request 2: 5592.51ms
   ‚úÖ Request 0: 11861.00ms

üìä LOAD TEST SUMMARY
Total Requests: 3
Successful: 1
Failed: 2
Success Rate: 33.3%

Latency (successful requests):
   Min: 11861.00ms
   Max: 11861.00ms
   Avg: 11861.00ms

Failed requests details:
  Request 1: status=500, error=None
  Request 2: status=500, error=None


In [13]:
# Check what the server is returning for failures
print("Failed request details:")
for r in load_results:
    if not r.get("success"):
        print(f"  Request {r['id']}: status={r.get('status_code')}")

# Let's make a test request with full response capture
import requests
api_key = test_results["api_key"]
test_url = TEST_DOCUMENTS["arxiv_docling"]

response = requests.post(
    f"{API_BASE_URL}/v1/convert/source",
    headers={"Authorization": f"Bearer {api_key}"},
    json={
        "sources": [{"kind": "http", "url": test_url}],
        "options": {"output_format": "markdown"}
    },
    timeout=300
)
print(f"Status: {response.status_code}")
if response.status_code != 200:
    print(f"Error body: {response.text}")
    

Failed request details:
  Request 1: status=500
  Request 2: status=500
Status: 200


In [12]:
# Check what errors occurred
print("Failed requests:")
for r in load_results:
    if not r.get("success"):
        print(f"  Request {r['id']}: status={r.get('status_code')}, error={r.get('error')}")

Failed requests:
  Request 0: status=500, error=None
  Request 2: status=500, error=None


In [None]:
# %% [markdown]
# ## 9. Check Remaining Credits

# %%
def check_credits():
    """Check remaining credits for the API key."""
    print("üí≥ Checking Credits...")
    print("-" * 50)
    
    api_key = test_results["api_key"]
    
    # Make a request to get credits info
    response = requests.get(
        f"{API_BASE_URL}/v1/usage",
        headers={"Authorization": f"Bearer {api_key}"}
    )
    
    if response.status_code == 200:
        data = response.json()
        print(f"Response: {json.dumps(data, indent=2)}")
    else:
        print(f"Status: {response.status_code}")
        print(f"Response: {response.text}")
    
    return response.json() if response.status_code == 200 else None

credits_info = check_credits()


In [None]:
# %% [markdown]
# ## 10. Final Summary Report

# %%
def generate_report():
    """Generate a final summary report of all tests."""
    print("\n" + "=" * 70)
    print("üìã FINAL TEST REPORT")
    print("=" * 70)
    print(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"API URL: {API_BASE_URL}")
    
    print("\nüîë API KEY")
    print("-" * 40)
    if test_results["api_key"]:
        print(f"   Key: {test_results['api_key'][:30]}...")
    
    print("\n‚è±Ô∏è  LATENCY TESTS")
    print("-" * 40)
    if test_results["latency_tests"]:
        latencies = [t["total_ms"] for t in test_results["latency_tests"]]
        print(f"   Runs: {len(latencies)}")
        print(f"   Average: {statistics.mean(latencies):.2f}ms")
        print(f"   Range: {min(latencies):.2f}ms - {max(latencies):.2f}ms")
    
    print("\nüéØ ACCURACY TESTS")
    print("-" * 40)
    if test_results["accuracy_tests"]:
        for test in test_results["accuracy_tests"]:
            print(f"   {test['name']}: {test['accuracy']:.1f}%")
    
    print("\n" + "=" * 70)
    print("‚úÖ Testing Complete!")
    print("=" * 70)

generate_report()


In [None]:
# %% [markdown]
# ## Bonus: Quick Test Function

# %%
def quick_test(url: str):
    """Quick function to test any URL."""
    api_key = test_results["api_key"]
    if not api_key:
        print("‚ùå No API key. Run cell 3 first!")
        return
    
    print(f"üöÄ Quick converting: {url}")
    start = time.time()
    
    response = requests.post(
        f"{API_BASE_URL}/v1/convert/source",
        headers={"Authorization": f"Bearer {api_key}"},
        json={
            "sources": [{"kind": "http", "url": url}],
            "options": {"output_format": "markdown"}
        },
        timeout=300
    )
    
    latency = time.time() - start
    
    if response.status_code == 200:
        data = response.json()
        result = data["results"][0]
        print(f"‚úÖ Done in {latency:.2f}s")
        print(f"   Pages: {result.get('pages')}")
        print(f"   Length: {len(result.get('markdown', '')):,} chars")
        return result.get("markdown")
    else:
        print(f"‚ùå Error: {response.text}")
        return None

# Example usage:
# markdown = quick_test("https://example.com/document.pdf")


In [11]:
import requests

# Configuration
API_URL = "https://web-production-00a7f.up.railway.app"
API_KEY = "dk_n1rRM75COGQ_VpWz7kKa3XBvEwOTQKEJ2HvaF91xh6MSuvCexGiyIsk"  # Your client's key

# PDF URL (can be S3, Google Drive, Dropbox, any public URL)
pdf_url = "https://drive.google.com/file/d/1H3poYQBzkHcqvD_HjVJrtVzHCJEPqwl-/view?usp=sharing"  # Docling paper

# Convert
response = requests.post(
    f"{API_URL}/v1/convert/source",
    headers={"Authorization": f"Bearer {API_KEY}"},
    json={
        "sources": [{"kind": "http", "url": pdf_url}],
        "options": {"output_format": "markdown"}
    },
    timeout=300
)

# Result
if response.status_code == 200:
    data = response.json()
    markdown = data["results"][0]["markdown"]
    print(f"‚úÖ Success! Pages: {data['results'][0]['pages']}")
    print(f"üìù Markdown preview:\n{markdown[:500]}...")
else:
    print(f"‚ùå Error: {response.text}")

‚úÖ Success! Pages: 1
üìù Markdown preview:
<!-- image -->

Dear Vivek,

Pursuant to the ending of your employment with us, you are herewith relieved from the services of Expedite Commerce India with effect from the close of business hours on April 10th, 2025.

We confirm that you were employed with Expedite Commerce from January 27th, 2025, to April 10th, 2025, and your designation at the time of leaving was AI LLM Agent Engineer

.

We wish you all the best in your future career endeavors.

Best Regards,

Brett Larson Chief People Offic...


In [12]:
markdown

'<!-- image -->\n\nDear Vivek,\n\nPursuant to the ending of your employment with us, you are herewith relieved from the services of Expedite Commerce India with effect from the close of business hours on April 10th, 2025.\n\nWe confirm that you were employed with Expedite Commerce from January 27th, 2025, to April 10th, 2025, and your designation at the time of leaving was AI LLM Agent Engineer\n\n.\n\nWe wish you all the best in your future career endeavors.\n\nBest Regards,\n\nBrett Larson Chief People Officer\n\n<!-- image -->'

In [13]:
# Final teting

In [6]:
import requests

# Configuration
API_URL = "https://web-production-00a7f.up.railway.app"
API_KEY = "dk_nfCI4J1wzrw_c_GVZJD3i3GrxMd85w_LHl-jT1rWcoXS5C5NHhIYwPM"  # Get from /v1/keys endpoint

headers = {"Authorization": f"Bearer {API_KEY}"}

In [10]:
import requests

API_URL = "https://web-production-00a7f.up.railway.app"
API_KEY = "dk_BYyVTukml1A_sTrxMX8NoUT6uaN-YfKchHhXvbycJwI4Bu00wZDLpco"  # Replace with your key

response = requests.post(
    f"{API_URL}/v1/convert/source",
    headers={"Authorization": f"Bearer {API_KEY}"},
    json={
        "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}],
        "options": {"output_format": "markdown",
                    "enable_ocr": True,
                    "enable_table_extraction": True
        }
    },
    timeout=300
)

if response.status_code == 200:
    data = response.json()
    markdown = data["results"][0]["markdown"]
    print(f"‚úÖ Pages: {data['results'][0]['pages']}")
    print(f"üìù Content:\n{markdown[:500]}...")
else:
    print(f"‚ùå Error: {response.text}")

‚úÖ Pages: 1


TypeError: 'NoneType' object is not subscriptable

In [None]:
markdowns

"## Docling: An Efficient Open-Source Toolkit for AI-driven Document Conversion\n\nNikolaos Livathinos * , Christoph Auer * , Maksym Lysak, Ahmed Nassar, Michele Dolfi, Panagiotis Vagenas, Cesar Berrospi, Matteo Omenetti, Kasper Dinkla, Yusik Kim, Shubham Gupta, Rafael Teixeira de Lima, Valery Weber, Lucas Morin, Ingmar Meijer, Viktor Kuropiatnyk, Peter W. J. Staar\n\nIBM Research, R¬® uschlikon, Switzerland\n\nPlease send correspondence to: deepsearch-core@zurich.ibm.com\n\n## Abstract\n\nWe introduce Docling , an easy-to-use, self-contained, MITlicensed, open-source toolkit for document conversion, that can parse several types of popular document formats into a unified, richly structured representation. It is powered by state-of-the-art specialized AI models for layout analysis (DocLayNet) and table structure recognition (TableFormer), and runs efficiently on commodity hardware in a small resource budget. Docling is released as a Python package and can be used as a Python API or as a

In [None]:
import requests

API_URL = "https://web-production-00a7f.up.railway.app"
API_KEY = "dk_QBgCJkoyQdw_h2w0T5psWw6ULAUgaswiMxlDW8QAOguuTg0ADqtS5lQ"

with open("/Users/viveksingh/Desktop/American-casinos-CIM.pdf", "rb") as f:
    response = requests.post(
        f"{API_URL}/v1/convert/file",
        headers={"Authorization": f"Bearer {API_KEY}"},
        files={"file": ("American-casinos-CIM.pdf", f, "application/pdf")},
        data={
            "enable_ocr": "true",
            "force_full_page_ocr": "true"
        },
        timeout=300
    )

# Print FULL response to debug
import json
print(f"Status: {response.status_code}")
print(f"Full response:\n{json.dumps(response.json(), indent=2)[:3000]}")

Status: 200
Full response:
{
  "request_id": "ea4071fb-b188-4a8b-9f54-d4ef0b8d26f0",
  "results": [
    {
      "source": "American-casinos-CIM.pdf",
      "status": "error",
      "pages": 1,
      "markdown": null,
      "json": null,
      "error": null,
      "processing_time_ms": 6841
    }
  ],
  "credits_used": 1,
  "credits_remaining": 499,
  "total_processing_time_ms": 6843
}
