In [0]:
%pip install --quiet databricks-sdk httpx PyMuPDF openai
dbutils.library.restartPython()

In [0]:
import base64
import fitz 
import pandas as pd
import os
import glob
from pathlib import Path
import time
import random
import threading
from collections import deque
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed
from pyspark.sql.functions import col, concat, lit, regexp_replace, split
from tqdm import tqdm


In [0]:
dbutils.widgets.text(
    "volume_path",
    "/Volumes/tsfrt/gsa/performance",
    label="Path to volume containing documents",
)

# MLflow experiment name.
dbutils.widgets.text(
    "output_schema",
    "tsfrt.gsa",
    label="Catalog and schema name for output table ({catalog}.{schema})",
)

dbutils.widgets.text(
    "output_catalog",
    "tsfrt",
    label="table for final output with embeddings",
)

dbutils.widgets.text(
    "output_schema",
    "gsa",
    label="table for final output with embeddings",
)

dbutils.widgets.text(
    "output_table",
    "document_base",
    label="table for final output with embeddings",
)

dbutils.widgets.text(
    "embedding_model",
    "databricks-gte-large-en",
    label="embedding model to use",
)

dbutils.widgets.text(
    "foundation_model",
    "databricks-llama-4-maverick",
    label="foundation model used for doc parsing",
)



In [0]:
# UPDATE THESE PATHS FOR YOUR SETUP
OUTPUT_CTLG = dbutils.widgets.get("output_catalog")
OUTPUT_SCHEMA = dbutils.widgets.get("output_schema")
OUTPUT_TABLE = dbutils.widgets.get("output_table")

PDF_DIRECTORY = dbutils.widgets.get("volume_path")
OUTPUT_CATALOG = f"{OUTPUT_CTLG}.{OUTPUT_SCHEMA}"

# You can choose the processing mode:
# "combined" - All PDFs go into one table with doc_id to distinguish (recommended)
# "separate" - Each PDF gets its own table
PROCESSING_MODE = "combined"  # or "separate"

# Table naming
if PROCESSING_MODE == "combined":
    INTERMEDIATE_TABLE = f"{OUTPUT_CATALOG}.all_pdfs_parsed_intermediate"
    FINAL_TABLE = f"{OUTPUT_CATALOG}.all_pdfs_parsed"
else:
    # For separate mode, tables will be named dynamically per PDF
    pass

context = dbutils.notebook.entry_point.getDbutils().notebook().getContext()
workspace_url = context.apiUrl().get()
DATABRICKS_TOKEN = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
DATABRICKS_BASE_URL = f'{workspace_url}/serving-endpoints/'

print(f"📂 PDF Directory: {PDF_DIRECTORY}")
print(f"📊 Output Catalog: {OUTPUT_CATALOG}")
print(f"🔧 Processing Mode: {PROCESSING_MODE}")
if PROCESSING_MODE == "combined":
    print(f"💾 Final Table: {FINAL_TABLE}")

📂 PDF Directory: /Volumes/tsfrt/gsa/performance
📊 Output Catalog: tsfrt.gsa
🔧 Processing Mode: combined
💾 Final Table: tsfrt.gsa.all_pdfs_parsed


In [0]:
def count_pdf_pages_fitz(directory=".", show_details=True):
    """
    Count pages in all PDF files in a directory using PyMuPDF (fitz)
    
    Args:
        directory (str): Directory path to scan for PDFs
        show_details (bool): Whether to show individual file counts
    
    Returns:
        tuple: (total_pages, file_count, errors)
    """
    pdf_files = list(Path(directory).glob("*.pdf"))
    
    if not pdf_files:
        print(f"No PDF files found in '{directory}'")
        return 0, 0, 0
    
    total_pages = 0
    file_count = 0
    errors = 0
    
    print(f"Scanning {len(pdf_files)} PDF files in '{directory}'...\n")
    
    for pdf_file in pdf_files:
        try:
            # Open PDF with fitz
            doc = fitz.open(pdf_file)
            pages = doc.page_count
            doc.close()
            
            if show_details:
                print(f"{pdf_file.name:<50} {pages:>6} pages")
            
            total_pages += pages
            file_count += 1
            
        except Exception as e:
            print(f"ERROR - {pdf_file.name}: {e}")
            errors += 1
    print("\n")
    return total_pages, file_count, errors

In [0]:

total_pages, num_documents, errors = (count_pdf_pages_fitz(PDF_DIRECTORY, show_details=True))

print(f"Total {num_documents} PDFs found with a total of {total_pages} pages. {errors} errors.")

Scanning 6 PDF files in '/Volumes/tsfrt/gsa/performance'...

FY 2024 GSA Annual Performance Plan FY 2022 Report_Final_508.pdf     91 pages
FY-2023-Annual-Performance-Report_FINAL_508-1282025.pdf     32 pages
FY-2026-GSA-Annual-Performance-Plan_5-28-25.pdf        10 pages
GSA-FY2024-Annual-Performance-Report-Final-508-Jan-2025.pdf     29 pages
GSA_Annual_Performance_Plan_FY_2023_FINAL_508.pdf      90 pages
executive-documents.pdf                                 1 pages


Total 6 PDFs found with a total of 253 pages. 0 errors.


In [0]:
def get_pdf_files(directory_path):
    """
    Get all PDF files from a Unity Catalog volume directory.
    
    Args:
        directory_path: Path to directory containing PDFs
        
    Returns:
        List of PDF file paths
    """
    try:
        # List all files in the directory
        files = dbutils.fs.ls(directory_path)
        
        # Filter for PDF files and clean the paths
        pdf_files = []
        for file in files:
            if file.path.lower().endswith('.pdf'):
                # Remove 'dbfs:' prefix if present to work with PyMuPDF
                clean_path = file.path.replace('dbfs:', '') if file.path.startswith('dbfs:') else file.path
                pdf_files.append(clean_path)
        
        print(f"Found {len(pdf_files)} PDF files in {directory_path}")
        for pdf in pdf_files:
            file_name = os.path.basename(pdf)
            print(f"  - {file_name}")
            print(f"    Path: {pdf}")
            
        return pdf_files
        
    except Exception as e:
        print(f"Error accessing directory {directory_path}: {str(e)}")
        return []

def get_clean_doc_name(pdf_path):
    """Extract a clean document name from the PDF path for table naming."""
    file_name = os.path.basename(pdf_path)
    # Remove .pdf extension and clean up for table naming
    clean_name = file_name.replace('.pdf', '').replace('.PDF', '')
    # Replace special characters with underscores
    clean_name = ''.join(c if c.isalnum() else '_' for c in clean_name)
    # Remove consecutive underscores and strip
    clean_name = '_'.join(filter(None, clean_name.split('_')))
    return clean_name.lower()

In [0]:
def convert_pdf_to_base64(pdf_path, dpi=300):
    """
    PDF conversion with better metadata and error handling.
    
    Args:
        pdf_path: Path to PDF file
        dpi: Resolution
    
    Returns:
        pandas DataFrame with metadata, success boolean, error message
    """
    
    zoom = dpi / 72
    zoom_matrix = fitz.Matrix(zoom, zoom)
    
    try:
        doc = fitz.open(pdf_path)
        num_pages = len(doc)
        
        # Extract document metadata
        metadata = doc.metadata
        file_name = os.path.basename(pdf_path)
        clean_doc_name = get_clean_doc_name(pdf_path)
        
        print(f"Converting {file_name} to base64: {num_pages} pages at {dpi} DPI...")
        
        df_data = []
        start_time = time.time()
        
        for page_num in range(num_pages):
            if page_num % 25 == 0:  # Progress update every 25 pages
                print(f"  Converting page {page_num + 1}/{num_pages} to base64")
            
            page = doc.load_page(page_num)
            
            # Get page dimensions and text for metadata
            page_rect = page.rect
            page_text_length = len(page.get_text())
            
            pix = page.get_pixmap(matrix=zoom_matrix, alpha=False)
            img_bytes = pix.tobytes("png")  
            img_base64 = base64.b64encode(img_bytes).decode('utf-8')
            
            df_data.append({
                'doc_id': pdf_path,
                'doc_name': clean_doc_name,
                'file_name': file_name,
                'page_num': page_num + 1,
                'total_pages': num_pages,
                'page_width': page_rect.width,
                'page_height': page_rect.height,
                'page_text_length': page_text_length,
                'base64_img': img_base64,
                'processed_timestamp': datetime.now(),
                'dpi': dpi,
                'doc_title': metadata.get('title', ''),
                'doc_author': metadata.get('author', ''),
                'doc_subject': metadata.get('subject', ''),
                'doc_creator': metadata.get('creator', '')
            })
        
        doc.close()
        processing_time = time.time() - start_time
        
        print(f"  Conversion complete: {len(df_data)} pages in {processing_time:.1f}s")
        
        return pd.DataFrame(df_data), True, None
        
    except Exception as e:
        error_msg = f"Error processing {pdf_path}: {str(e)}"
        print(f"❌ {error_msg}")
        return None, False, error_msg

In [0]:
def save_to_unity_catalog(df, table_path, mode="append"):
    """
    Save function with better error handling and options.
    """
    try:
        spark_df = spark.createDataFrame(df)
        
        if mode == "overwrite":
            spark_df.write \
                .format("delta") \
                .mode("overwrite") \
                .option("overwriteSchema", "true") \
                .saveAsTable(table_path)
        else:
            spark_df.write \
                .format("delta") \
                .mode("append") \
                .saveAsTable(table_path)
        
        print(f"✅ Saved {len(df)} records to: {table_path}")
        return True
        
    except Exception as e:
        print(f"❌ Error saving to {table_path}: {str(e)}")
        return False

In [0]:
RETRYABLE_ERROR_SUBSTRINGS = ["retry", "got empty embedding result", "request_limit_exceeded", "rate limit", "insufficient_quota", "expecting value", "rate", "overloaded", "429", "bad gateway", "502"]

class RateLimitTracker:
    """Track API rate limits and adjust concurrency dynamically."""
    
    def __init__(self, initial_workers=5, min_workers=1, max_workers=10):
        self.current_workers = initial_workers
        self.min_workers = min_workers
        self.max_workers = max_workers
        self.rate_limit_events = deque(maxlen=20)  # Track recent rate limits
        self.success_count = 0
        self.lock = threading.Lock()
        
    def record_rate_limit(self):
        """Record a rate limit event and potentially reduce workers."""
        with self.lock:
            self.rate_limit_events.append(datetime.now())
            
            # If we've had multiple rate limits recently, reduce workers
            recent_limits = sum(1 for event in self.rate_limit_events 
                              if datetime.now() - event < timedelta(minutes=2))
            
            if recent_limits >= 3 and self.current_workers > self.min_workers:
                old_workers = self.current_workers
                self.current_workers = max(self.min_workers, self.current_workers - 1)
                print(f"🔽 Rate limits detected! Reducing workers: {old_workers} → {self.current_workers}")
                
    def record_success(self):
        """Record successful processing and potentially increase workers."""
        with self.lock:
            self.success_count += 1
            
            # If no recent rate limits and we've had some successes, gradually increase workers
            recent_limits = sum(1 for event in self.rate_limit_events 
                              if datetime.now() - event < timedelta(minutes=5))
            
            # Increase workers every 20 successes if no recent rate limits
            if (recent_limits == 0 and 
                self.current_workers < self.max_workers and 
                self.success_count % 20 == 0):
                old_workers = self.current_workers
                self.current_workers = min(self.max_workers, self.current_workers + 1)
                print(f"🔼 Performance good! Increasing workers: {old_workers} → {self.current_workers}")

In [0]:
def process_single_image(prompt, image_data, image_index, databricks_token, databricks_url, model, rate_tracker):
    """Process a single image with adaptive rate limiting."""
    
    client = OpenAI(api_key=databricks_token, base_url=databricks_url)
    
    # Skip empty images
    if pd.isna(image_data) or image_data == "":
        return (image_index, "ERROR: Empty image")
    
    
    # Retry logic with exponential backoff
    for attempt in range(3):
        try:
            response = client.chat.completions.create(
                model=model,
                messages=[{
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {
                            "type": "image_url",
                            "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}
                        }
                    ]
                }]
            )
            
            result = response.choices[0].message.content.strip()
            rate_tracker.record_success()
            
            # Print success message if this was a retry attempt
            if attempt > 0:
                print(f"✅ SUCCESS: Image {image_index} processed successfully after {attempt + 1} attempts")
            
            return (image_index, result)
            
        except Exception as e:
            error_str = str(e).lower()
            is_retryable = any(substring in error_str for substring in RETRYABLE_ERROR_SUBSTRINGS)
            
            if is_retryable:
                rate_tracker.record_rate_limit()
                
                if attempt < 2:  # Only retry if we have attempts left
                    # Exponential backoff with jitter
                    wait_time = (2 ** attempt) + random.uniform(1, 3)
                    print(f"⚠️  RATE LIMIT: Image {image_index}, attempt {attempt + 1}/3. Retrying in {wait_time:.1f}s...")
                    time.sleep(wait_time)
                    continue
                else:
                    print(f"❌ FAILED: Image {image_index} failed after 3 attempts due to rate limiting")
                    return (image_index, f"ERROR: Rate limited after 3 attempts - {str(e)}")
            else:
                print(f"❌ ERROR: Image {image_index} failed with non-retryable error: {str(e)}")
                return (image_index, f"ERROR: {str(e)}")
    
    return (image_index, "ERROR: Max retries exceeded")

In [0]:
def process_images_adaptive(prompt, images, databricks_token, databricks_url, 
                           model="databricks-llama-4-maverick", 
                           initial_workers=5, min_workers=1, max_workers=10):
    """
    Adaptive processing that adjusts concurrency based on rate limits.
    
    Args:
        images: pandas Series of base64 encoded image strings
        databricks_token: Token for Databricks API  
        databricks_url: Base URL for Databricks API
        model: Model name to use
        initial_workers: Starting number of concurrent workers
        min_workers: Minimum workers (fallback during heavy rate limiting)
        max_workers: Maximum workers (cap for scaling up)
        
    Returns:
        pandas Series: Results with same index as input
    """
    
    # Convert to pandas Series if needed
    if not isinstance(images, pd.Series):
        images = pd.Series(images)
    
    results = pd.Series(index=images.index, dtype='object')
    rate_tracker = RateLimitTracker(
        initial_workers=initial_workers, 
        min_workers=min_workers, 
        max_workers=max_workers
    )
    
    print(f"🚀 Starting transcription of {len(images)} images...")
    print(f"📊 Model: {model}")
    print(f"⚙️  Workers: {initial_workers} (range: {min_workers}-{max_workers})")
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        with tqdm(total=len(images), desc="Processing images", unit="img") as pbar:
            
            remaining_items = list(images.items())
            
            while remaining_items:
                # Submit batch based on current worker count
                batch_size = min(rate_tracker.current_workers, len(remaining_items))
                current_batch = remaining_items[:batch_size]
                remaining_items = remaining_items[batch_size:]
                
                # Submit current batch
                futures = {
                    executor.submit(process_single_image, prompt, img_data, idx, 
                                  databricks_token, databricks_url, model, rate_tracker): idx
                    for idx, img_data in current_batch
                }
                
                # Process batch results
                for future in as_completed(futures):
                    try:
                        image_index, result = future.result()
                        results[image_index] = result
                        
                        # Update progress bar with status and current worker count
                        if result.startswith("ERROR:"):
                            pbar.set_postfix({
                                "Last": f"❌ {image_index}", 
                                "Workers": rate_tracker.current_workers,
                                "Rate Limits": len(rate_tracker.rate_limit_events)
                            })
                        else:
                            pbar.set_postfix({
                                "Last": f"✅ {image_index}", 
                                "Workers": rate_tracker.current_workers,
                                "Rate Limits": len(rate_tracker.rate_limit_events)
                            })
                        
                    except Exception as e:
                        idx = futures[future]
                        results[idx] = f"ERROR: Exception - {str(e)}"
                        pbar.set_postfix({
                            "Last": f"❌ {idx} (Exception)", 
                            "Workers": rate_tracker.current_workers
                        })
                        print(f"❌ EXCEPTION: Image {idx} failed with exception: {str(e)}")
                    
                    pbar.update(1)
                
                # Small delay between batches if we have more to process
                if remaining_items:
                    time.sleep(0.2)  # Small delay to prevent overwhelming
    
    # Final summary statistics
    error_count = sum(1 for result in results if str(result).startswith("ERROR:"))
    success_count = len(results) - error_count
    
    print(f"\n📈 Llama 4 Transcription Summary:")
    print(f"   ✅ Successful: {success_count}/{len(results)}")
    print(f"   ❌ Failed: {error_count}/{len(results)}")
    print(f"   📊 Success rate: {(success_count/len(results)*100):.1f}%")
    print(f"   🔧 Final worker count: {rate_tracker.current_workers}")
    print(f"   ⚠️  Total rate limit events: {len(rate_tracker.rate_limit_events)}")
    
    return results

Tweak your prompt based on document content as needed.

In [0]:

    # Define the prompt
PROMPT = """
Instructions: Transcribe only the visible text from this PDF page. 
Rules:
- Use markdown formatting only for text that appears formatted in the original
- Do not add document titles, page headers, or section headings unless explicitly visible
- Do not add introductory text like 'This page contains...' or 'The document shows...' or '# Transcription of PDF Page'
- Preserve exact wording and technical terminology
- For images/diagrams: describe content within <figure></figure> tags
- For tables: use markdown table format if present
- Start transcription immediately without preamble
For visual elements, follow these rules:
**TABLES**: If the content is clearly a structured table, provide BOTH:
1. A detailed caption in <figure></figure> tags describing the table structure and content
2. The actual table recreated in markdown format with proper alignment
**FLOWCHARTS/DECISION TREES**: Provide detailed caption in <figure></figure> tags including:
- Starting point and decision criteria
- All pathways and decision branches
- Specific thresholds, values, and conditions
- Final outcomes and recommendations
- Flow direction and logical connections
**CHARTS/DIAGRAMS**: Provide detailed caption in <figure></figure> tags including:
- Chart type and title
- All categories, sections, and color coding
- Specific values, ranges, and criteria
- Evidence levels and recommendations
- Visual organization and groupings
**FORMS/CHECKLISTS**: Transcribe structure using markdown formatting, preserving:
- Section headers and numbering
- Checkbox options and rating scales
- Please bold the Key in Key-Value Pairs in the form, e.g. **Name **: John Doe.
Preserve exact technical terminology, drug names, dosages, and clinical criteria for diagnostic accuracy.
This transcription will be used for technical diagnosis, so accuracy is critical.
"""

In [0]:
def process_multiple_pdfs(pdf_directory, output_catalog, prompt=PROMPT, processing_mode="combined", 
                         dpi=300, model="databricks-llama-4-maverick", initial_workers=5, 
                         min_workers=1, max_workers=10):
    """
    Process all PDFs in a directory.
    
    Args:
        pdf_directory: Directory containing PDF files
        output_catalog: Catalog.schema for output tables
        processing_mode: "combined" or "separate"
        dpi: Image resolution
        model: LLM model to use
    """
    
    # Discover PDF files
    pdf_files = get_pdf_files(pdf_directory)
    
    if not pdf_files:
        print("No PDF files found. Exiting.")
        return
    
    print(f"\n🚀 Starting batch processing of {len(pdf_files)} PDFs")
    print(f"📊 Processing mode: {processing_mode}")
    print(f"🎯 Output catalog: {output_catalog}")
    
    # Initialize tracking variables
    total_files = len(pdf_files)
    successful_files = 0
    failed_files = 0
    total_pages_processed = 0
    all_results = []
    processing_log = []
    
    # Process each PDF
    for file_idx, pdf_path in enumerate(pdf_files, 1):
        file_name = os.path.basename(pdf_path)
        clean_doc_name = get_clean_doc_name(pdf_path)
        
        print(f"\n{'='*60}")
        print(f"📄 Processing file {file_idx}/{total_files}: {file_name}")
        print(f"{'='*60}")
        
        file_start_time = time.time()
        
        try:
            # Convert PDF to base64 images
            df, success, error = convert_pdf_to_base64(pdf_path, dpi=dpi)
            
            if not success:
                failed_files += 1
                processing_log.append({
                    'file_name': file_name,
                    'status': 'FAILED_CONVERSION',
                    'error': error,
                    'pages_processed': 0,
                    'processing_time': time.time() - file_start_time
                })
                continue
            
            # Save intermediate results
            if processing_mode == "combined":
                intermediate_table = f"{output_catalog}.all_pdfs_parsed_intermediate"
                save_mode = "append" if file_idx > 1 else "overwrite"
            else:
                intermediate_table = f"{output_catalog}.{clean_doc_name}_parsed_intermediate"
                save_mode = "overwrite"
                
            save_to_unity_catalog(df, intermediate_table, mode=save_mode)
            
            # Process images with LLM
            print(f"🤖 Starting LLM processing for {len(df)} pages...")
            
            # Process with adaptive rate limiting
            results_series = process_images_adaptive(
                prompt=prompt,
                images=df['base64_img'],
                databricks_token=DATABRICKS_TOKEN,
                databricks_url=DATABRICKS_BASE_URL,
                model=model,
                initial_workers=initial_workers,
                min_workers=min_workers,
                max_workers=max_workers
            )
            
            # Add transcription results to dataframe
            df['transcription'] = results_series
            
            # Count successful transcriptions
            error_count = sum(1 for result in results_series if str(result).startswith("ERROR:"))
            success_count = len(results_series) - error_count
            
            # Save final results
            if processing_mode == "combined":
                final_table = f"{output_catalog}.all_pdfs_parsed"
                save_mode = "append" if file_idx > 1 else "overwrite"
            else:
                final_table = f"{output_catalog}.{clean_doc_name}_parsed"
                save_mode = "overwrite"
                
            save_success = save_to_unity_catalog(df, final_table, mode=save_mode)
            
            if save_success:
                successful_files += 1
                total_pages_processed += len(df)
                all_results.append(df)
                
                file_processing_time = time.time() - file_start_time
                
                processing_log.append({
                    'file_name': file_name,
                    'status': 'SUCCESS',
                    'pages_processed': len(df),
                    'successful_transcriptions': success_count,
                    'failed_transcriptions': error_count,
                    'processing_time': file_processing_time,
                    'final_table': final_table
                })
                
                print(f"✅ File completed successfully:")
                print(f"   📊 Pages: {len(df)}")
                print(f"   ✅ Successful transcriptions: {success_count}")
                print(f"   ❌ Failed transcriptions: {error_count}")
                print(f"   ⏱️  Processing time: {file_processing_time:.1f}s")
                print(f"   💾 Saved to: {final_table}")
            else:
                failed_files += 1
                processing_log.append({
                    'file_name': file_name,
                    'status': 'FAILED_SAVE',
                    'pages_processed': len(df),
                    'processing_time': time.time() - file_start_time
                })
                
        except Exception as e:
            failed_files += 1
            file_processing_time = time.time() - file_start_time
            error_msg = str(e)
            
            processing_log.append({
                'file_name': file_name,
                'status': 'FAILED_EXCEPTION',
                'error': error_msg,
                'pages_processed': 0,
                'processing_time': file_processing_time
            })
            
            print(f"❌ Failed to process {file_name}: {error_msg}")
    
    # Final summary
    print(f"\n{'='*80}")
    print(f"🎊 BATCH PROCESSING COMPLETE")
    print(f"{'='*80}")
    print(f"📊 Files processed: {successful_files}/{total_files}")
    print(f"📄 Total pages processed: {total_pages_processed}")
    print(f"✅ Successful files: {successful_files}")
    print(f"❌ Failed files: {failed_files}")
    
    if processing_mode == "combined" and successful_files > 0:
        print(f"💾 All results combined in: {output_catalog}.all_pdfs_parsed")
    
    # Show processing log
    print(f"\n📋 PROCESSING LOG:")
    for log_entry in processing_log:
        status_emoji = "✅" if log_entry['status'] == 'SUCCESS' else "❌"
        print(f"   {status_emoji} {log_entry['file_name']}: {log_entry['status']} "
              f"({log_entry['pages_processed']} pages, {log_entry['processing_time']:.1f}s)")
        
        if 'error' in log_entry:
            print(f"      Error: {log_entry['error']}")
    
    return processing_log, all_results

In [0]:
# Run the batch processing
processing_log, all_results = process_multiple_pdfs(
    pdf_directory=PDF_DIRECTORY,
    output_catalog=OUTPUT_CATALOG,
    prompt = PROMPT,
    processing_mode=PROCESSING_MODE,
    dpi=150,
    model="databricks-llama-4-maverick", #default databricks-llama-4-maverick, change to your own provisioned throughput endpoint for more speed
    initial_workers=3, #update if you have a provisioned throughput endpoint
    min_workers=1, #default 1
    max_workers=3 #update if you have a provisioned throughput endpoint
)

Found 6 PDF files in /Volumes/tsfrt/gsa/performance
  - FY 2024 GSA Annual Performance Plan FY 2022 Report_Final_508.pdf
    Path: /Volumes/tsfrt/gsa/performance/FY 2024 GSA Annual Performance Plan FY 2022 Report_Final_508.pdf
  - FY-2023-Annual-Performance-Report_FINAL_508-1282025.pdf
    Path: /Volumes/tsfrt/gsa/performance/FY-2023-Annual-Performance-Report_FINAL_508-1282025.pdf
  - FY-2026-GSA-Annual-Performance-Plan_5-28-25.pdf
    Path: /Volumes/tsfrt/gsa/performance/FY-2026-GSA-Annual-Performance-Plan_5-28-25.pdf
  - GSA-FY2024-Annual-Performance-Report-Final-508-Jan-2025.pdf
    Path: /Volumes/tsfrt/gsa/performance/GSA-FY2024-Annual-Performance-Report-Final-508-Jan-2025.pdf
  - GSA_Annual_Performance_Plan_FY_2023_FINAL_508.pdf
    Path: /Volumes/tsfrt/gsa/performance/GSA_Annual_Performance_Plan_FY_2023_FINAL_508.pdf
  - executive-documents.pdf
    Path: /Volumes/tsfrt/gsa/performance/executive-documents.pdf

🚀 Starting batch processing of 6 PDFs
📊 Processing mode: combined
🎯 Out

Processing images:   0%|          | 0/91 [00:00<?, ?img/s]Processing images:   0%|          | 0/91 [00:00<?, ?img/s, Last=❌ 1, Workers=3, Rate Limits=0]Processing images:   1%|          | 1/91 [00:00<01:23,  1.07img/s, Last=❌ 1, Workers=3, Rate Limits=0]Processing images:   1%|          | 1/91 [00:01<01:23,  1.07img/s, Last=❌ 2, Workers=3, Rate Limits=0]

❌ ERROR: Image 1 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 2 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:   2%|▏         | 2/91 [00:01<01:22,  1.07img/s, Last=❌ 0, Workers=3, Rate Limits=0]Processing images:   3%|▎         | 3/91 [00:01<00:28,  3.12img/s, Last=❌ 0, Workers=3, Rate Limits=0]

❌ ERROR: Image 0 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:   3%|▎         | 3/91 [00:02<00:28,  3.12img/s, Last=❌ 3, Workers=3, Rate Limits=0]Processing images:   4%|▍         | 4/91 [00:02<00:50,  1.72img/s, Last=❌ 3, Workers=3, Rate Limits=0]Processing images:   4%|▍         | 4/91 [00:02<00:50,  1.72img/s, Last=❌ 4, Workers=3, Rate Limits=0]Processing images:   5%|▌         | 5/91 [00:02<00:50,  1.72img/s, Last=❌ 5, Workers=3, Rate Limits=0]

❌ ERROR: Image 3 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 4 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 5 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:   7%|▋         | 6/91 [00:03<00:49,  1.72img/s, Last=❌ 7, Workers=3, Rate Limits=0]Processing images:   8%|▊         | 7/91 [00:03<00:36,  2.30img/s, Last=❌ 7, Workers=3, Rate Limits=0]Processing images:   8%|▊         | 7/91 [00:03<00:36,  2.30img/s, Last=❌ 6, Workers=3, Rate Limits=0]Processing images:   9%|▉         | 8/91 [00:03<00:30,  2.70img/s, Last=❌ 6, Workers=3, Rate Limits=0]

❌ ERROR: Image 7 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 6 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:   9%|▉         | 8/91 [00:03<00:30,  2.70img/s, Last=❌ 8, Workers=3, Rate Limits=0]Processing images:  10%|▉         | 9/91 [00:03<00:27,  3.02img/s, Last=❌ 8, Workers=3, Rate Limits=0]

❌ ERROR: Image 8 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  10%|▉         | 9/91 [00:04<00:27,  3.02img/s, Last=❌ 11, Workers=3, Rate Limits=0]Processing images:  11%|█         | 10/91 [00:04<00:40,  2.00img/s, Last=❌ 11, Workers=3, Rate Limits=0]Processing images:  11%|█         | 10/91 [00:04<00:40,  2.00img/s, Last=❌ 9, Workers=3, Rate Limits=0] Processing images:  12%|█▏        | 11/91 [00:04<00:33,  2.38img/s, Last=❌ 9, Workers=3, Rate Limits=0]

❌ ERROR: Image 11 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 9 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  12%|█▏        | 11/91 [00:04<00:33,  2.38img/s, Last=❌ 10, Workers=3, Rate Limits=0]

❌ ERROR: Image 10 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  13%|█▎        | 12/91 [00:05<00:33,  2.38img/s, Last=❌ 14, Workers=3, Rate Limits=0]Processing images:  14%|█▍        | 13/91 [00:05<00:37,  2.08img/s, Last=❌ 14, Workers=3, Rate Limits=0]Processing images:  14%|█▍        | 13/91 [00:06<00:37,  2.08img/s, Last=❌ 12, Workers=3, Rate Limits=0]Processing images:  15%|█▌        | 14/91 [00:06<00:31,  2.48img/s, Last=❌ 12, Workers=3, Rate Limits=0]Processing images:  15%|█▌        | 14/91 [00:06<00:31,  2.48img/s, Last=❌ 13, Workers=3, Rate Limits=0]

❌ ERROR: Image 14 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 12 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 13 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  16%|█▋        | 15/91 [00:07<00:30,  2.48img/s, Last=❌ 17, Workers=3, Rate Limits=0]Processing images:  18%|█▊        | 16/91 [00:07<00:33,  2.25img/s, Last=❌ 17, Workers=3, Rate Limits=0]

❌ ERROR: Image 17 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  18%|█▊        | 16/91 [00:07<00:33,  2.25img/s, Last=❌ 16, Workers=3, Rate Limits=0]Processing images:  19%|█▊        | 17/91 [00:07<00:30,  2.41img/s, Last=❌ 16, Workers=3, Rate Limits=0]Processing images:  19%|█▊        | 17/91 [00:07<00:30,  2.41img/s, Last=❌ 15, Workers=3, Rate Limits=0]

❌ ERROR: Image 16 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 15 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  20%|█▉        | 18/91 [00:08<00:30,  2.41img/s, Last=❌ 20, Workers=3, Rate Limits=0]Processing images:  21%|██        | 19/91 [00:08<00:34,  2.08img/s, Last=❌ 20, Workers=3, Rate Limits=0]Processing images:  21%|██        | 19/91 [00:08<00:34,  2.08img/s, Last=❌ 18, Workers=3, Rate Limits=0]Processing images:  22%|██▏       | 20/91 [00:08<00:34,  2.08img/s, Last=❌ 19, Workers=3, Rate Limits=0]

❌ ERROR: Image 20 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 18 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 19 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  23%|██▎       | 21/91 [00:09<00:33,  2.08img/s, Last=❌ 22, Workers=3, Rate Limits=0]Processing images:  24%|██▍       | 22/91 [00:09<00:27,  2.53img/s, Last=❌ 22, Workers=3, Rate Limits=0]Processing images:  24%|██▍       | 22/91 [00:09<00:27,  2.53img/s, Last=❌ 21, Workers=3, Rate Limits=0]Processing images:  25%|██▌       | 23/91 [00:09<00:26,  2.53img/s, Last=❌ 23, Workers=3, Rate Limits=0]Processing images:  26%|██▋       | 24/91 [00:09<00:19,  3.42img/s, Last=❌ 23, Workers=3, Rate Limits=0]

❌ ERROR: Image 22 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 21 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 23 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  26%|██▋       | 24/91 [00:10<00:19,  3.42img/s, Last=❌ 24, Workers=3, Rate Limits=0]Processing images:  27%|██▋       | 25/91 [00:10<00:25,  2.61img/s, Last=❌ 24, Workers=3, Rate Limits=0]Processing images:  27%|██▋       | 25/91 [00:10<00:25,  2.61img/s, Last=❌ 25, Workers=3, Rate Limits=0]Processing images:  29%|██▊       | 26/91 [00:10<00:24,  2.61img/s, Last=❌ 26, Workers=3, Rate Limits=0]

❌ ERROR: Image 24 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 25 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 26 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  30%|██▉       | 27/91 [00:11<00:24,  2.61img/s, Last=❌ 28, Workers=3, Rate Limits=0]Processing images:  31%|███       | 28/91 [00:11<00:22,  2.86img/s, Last=❌ 28, Workers=3, Rate Limits=0]Processing images:  31%|███       | 28/91 [00:11<00:22,  2.86img/s, Last=❌ 27, Workers=3, Rate Limits=0]Processing images:  32%|███▏      | 29/91 [00:11<00:21,  2.86img/s, Last=❌ 29, Workers=3, Rate Limits=0]Processing images:  33%|███▎      | 30/91 [00:11<00:16,  3.75img/s, Last=❌ 29, Workers=3, Rate Limits=0]

❌ ERROR: Image 28 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 27 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 29 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  33%|███▎      | 30/91 [00:12<00:16,  3.75img/s, Last=❌ 30, Workers=3, Rate Limits=0]Processing images:  34%|███▍      | 31/91 [00:12<00:21,  2.78img/s, Last=❌ 30, Workers=3, Rate Limits=0]Processing images:  34%|███▍      | 31/91 [00:12<00:21,  2.78img/s, Last=❌ 31, Workers=3, Rate Limits=0]Processing images:  35%|███▌      | 32/91 [00:12<00:21,  2.78img/s, Last=❌ 32, Workers=3, Rate Limits=0]

❌ ERROR: Image 30 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 31 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 32 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  36%|███▋      | 33/91 [00:13<00:20,  2.78img/s, Last=❌ 33, Workers=3, Rate Limits=0]Processing images:  37%|███▋      | 34/91 [00:13<00:18,  3.03img/s, Last=❌ 33, Workers=3, Rate Limits=0]Processing images:  37%|███▋      | 34/91 [00:13<00:18,  3.03img/s, Last=❌ 34, Workers=3, Rate Limits=0]Processing images:  38%|███▊      | 35/91 [00:13<00:18,  3.03img/s, Last=❌ 35, Workers=3, Rate Limits=0]

❌ ERROR: Image 33 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 34 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 35 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  40%|███▉      | 36/91 [00:13<00:18,  3.03img/s, Last=❌ 36, Workers=3, Rate Limits=0]Processing images:  41%|████      | 37/91 [00:13<00:16,  3.24img/s, Last=❌ 36, Workers=3, Rate Limits=0]Processing images:  41%|████      | 37/91 [00:13<00:16,  3.24img/s, Last=❌ 38, Workers=3, Rate Limits=0]Processing images:  42%|████▏     | 38/91 [00:13<00:16,  3.24img/s, Last=❌ 37, Workers=3, Rate Limits=0]

❌ ERROR: Image 36 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 38 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 37 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  43%|████▎     | 39/91 [00:14<00:16,  3.24img/s, Last=❌ 39, Workers=3, Rate Limits=0]Processing images:  44%|████▍     | 40/91 [00:14<00:15,  3.31img/s, Last=❌ 39, Workers=3, Rate Limits=0]Processing images:  44%|████▍     | 40/91 [00:14<00:15,  3.31img/s, Last=❌ 40, Workers=3, Rate Limits=0]Processing images:  45%|████▌     | 41/91 [00:14<00:15,  3.31img/s, Last=❌ 41, Workers=3, Rate Limits=0]Processing images:  46%|████▌     | 42/91 [00:14<00:11,  4.16img/s, Last=❌ 41, Workers=3, Rate Limits=0]

❌ ERROR: Image 39 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 40 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 41 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  46%|████▌     | 42/91 [00:15<00:11,  4.16img/s, Last=❌ 43, Workers=3, Rate Limits=0]Processing images:  47%|████▋     | 43/91 [00:15<00:15,  3.06img/s, Last=❌ 43, Workers=3, Rate Limits=0]Processing images:  47%|████▋     | 43/91 [00:15<00:15,  3.06img/s, Last=❌ 42, Workers=3, Rate Limits=0]Processing images:  48%|████▊     | 44/91 [00:15<00:15,  3.06img/s, Last=❌ 44, Workers=3, Rate Limits=0]

❌ ERROR: Image 43 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 42 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 44 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  49%|████▉     | 45/91 [00:16<00:15,  3.06img/s, Last=❌ 47, Workers=3, Rate Limits=0]Processing images:  51%|█████     | 46/91 [00:16<00:14,  3.21img/s, Last=❌ 47, Workers=3, Rate Limits=0]Processing images:  51%|█████     | 46/91 [00:16<00:14,  3.21img/s, Last=❌ 45, Workers=3, Rate Limits=0]Processing images:  52%|█████▏    | 47/91 [00:16<00:13,  3.21img/s, Last=❌ 46, Workers=3, Rate Limits=0]

❌ ERROR: Image 47 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 45 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 46 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  53%|█████▎    | 48/91 [00:17<00:13,  3.21img/s, Last=❌ 48, Workers=3, Rate Limits=0]Processing images:  54%|█████▍    | 49/91 [00:17<00:12,  3.37img/s, Last=❌ 48, Workers=3, Rate Limits=0]

❌ ERROR: Image 48 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  54%|█████▍    | 49/91 [00:17<00:12,  3.37img/s, Last=❌ 50, Workers=3, Rate Limits=0]Processing images:  55%|█████▍    | 50/91 [00:17<00:13,  3.04img/s, Last=❌ 50, Workers=3, Rate Limits=0]Processing images:  55%|█████▍    | 50/91 [00:17<00:13,  3.04img/s, Last=❌ 49, Workers=3, Rate Limits=0]Processing images:  56%|█████▌    | 51/91 [00:17<00:11,  3.45img/s, Last=❌ 49, Workers=3, Rate Limits=0]

❌ ERROR: Image 50 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 49 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  56%|█████▌    | 51/91 [00:18<00:11,  3.45img/s, Last=❌ 52, Workers=3, Rate Limits=0]Processing images:  57%|█████▋    | 52/91 [00:18<00:15,  2.53img/s, Last=❌ 52, Workers=3, Rate Limits=0]Processing images:  57%|█████▋    | 52/91 [00:18<00:15,  2.53img/s, Last=❌ 51, Workers=3, Rate Limits=0]Processing images:  58%|█████▊    | 53/91 [00:18<00:14,  2.53img/s, Last=❌ 53, Workers=3, Rate Limits=0]

❌ ERROR: Image 52 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 51 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 53 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  59%|█████▉    | 54/91 [00:19<00:14,  2.53img/s, Last=❌ 56, Workers=3, Rate Limits=0]Processing images:  60%|██████    | 55/91 [00:19<00:12,  2.90img/s, Last=❌ 56, Workers=3, Rate Limits=0]Processing images:  60%|██████    | 55/91 [00:19<00:12,  2.90img/s, Last=❌ 55, Workers=3, Rate Limits=0]Processing images:  62%|██████▏   | 56/91 [00:19<00:12,  2.90img/s, Last=❌ 54, Workers=3, Rate Limits=0]

❌ ERROR: Image 56 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 55 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 54 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  63%|██████▎   | 57/91 [00:20<00:11,  2.90img/s, Last=❌ 57, Workers=3, Rate Limits=0]Processing images:  64%|██████▎   | 58/91 [00:20<00:10,  3.11img/s, Last=❌ 57, Workers=3, Rate Limits=0]Processing images:  64%|██████▎   | 58/91 [00:20<00:10,  3.11img/s, Last=❌ 59, Workers=3, Rate Limits=0]Processing images:  65%|██████▍   | 59/91 [00:20<00:10,  3.11img/s, Last=❌ 58, Workers=3, Rate Limits=0]

❌ ERROR: Image 57 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 59 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 58 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  66%|██████▌   | 60/91 [00:21<00:09,  3.11img/s, Last=❌ 61, Workers=3, Rate Limits=0]Processing images:  67%|██████▋   | 61/91 [00:21<00:09,  3.15img/s, Last=❌ 61, Workers=3, Rate Limits=0]

❌ ERROR: Image 61 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  67%|██████▋   | 61/91 [00:21<00:09,  3.15img/s, Last=❌ 60, Workers=3, Rate Limits=0]Processing images:  68%|██████▊   | 62/91 [00:21<00:09,  2.98img/s, Last=❌ 60, Workers=3, Rate Limits=0]Processing images:  68%|██████▊   | 62/91 [00:21<00:09,  2.98img/s, Last=❌ 62, Workers=3, Rate Limits=0]

❌ ERROR: Image 60 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 62 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  69%|██████▉   | 63/91 [00:22<00:09,  2.98img/s, Last=❌ 64, Workers=3, Rate Limits=0]Processing images:  70%|███████   | 64/91 [00:22<00:09,  2.82img/s, Last=❌ 64, Workers=3, Rate Limits=0]Processing images:  70%|███████   | 64/91 [00:22<00:09,  2.82img/s, Last=❌ 65, Workers=3, Rate Limits=0]Processing images:  71%|███████▏  | 65/91 [00:22<00:09,  2.82img/s, Last=❌ 63, Workers=3, Rate Limits=0]

❌ ERROR: Image 64 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 65 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 63 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  73%|███████▎  | 66/91 [00:23<00:08,  2.82img/s, Last=❌ 66, Workers=3, Rate Limits=0]Processing images:  74%|███████▎  | 67/91 [00:23<00:07,  3.11img/s, Last=❌ 66, Workers=3, Rate Limits=0]Processing images:  74%|███████▎  | 67/91 [00:23<00:07,  3.11img/s, Last=❌ 67, Workers=3, Rate Limits=0]Processing images:  75%|███████▍  | 68/91 [00:23<00:07,  3.11img/s, Last=❌ 68, Workers=3, Rate Limits=0]Processing images:  76%|███████▌  | 69/91 [00:23<00:05,  3.93img/s, Last=❌ 68, Workers=3, Rate Limits=0]

❌ ERROR: Image 66 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 67 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 68 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  76%|███████▌  | 69/91 [00:24<00:05,  3.93img/s, Last=❌ 70, Workers=3, Rate Limits=0]Processing images:  77%|███████▋  | 70/91 [00:24<00:07,  2.86img/s, Last=❌ 70, Workers=3, Rate Limits=0]Processing images:  77%|███████▋  | 70/91 [00:24<00:07,  2.86img/s, Last=❌ 71, Workers=3, Rate Limits=0]Processing images:  78%|███████▊  | 71/91 [00:24<00:06,  2.86img/s, Last=❌ 69, Workers=3, Rate Limits=0]

❌ ERROR: Image 70 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 71 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 69 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  79%|███████▉  | 72/91 [00:25<00:06,  2.86img/s, Last=❌ 72, Workers=3, Rate Limits=0]Processing images:  80%|████████  | 73/91 [00:25<00:05,  3.14img/s, Last=❌ 72, Workers=3, Rate Limits=0]

❌ ERROR: Image 72 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  80%|████████  | 73/91 [00:25<00:05,  3.14img/s, Last=❌ 74, Workers=3, Rate Limits=0]Processing images:  81%|████████▏ | 74/91 [00:25<00:05,  3.01img/s, Last=❌ 74, Workers=3, Rate Limits=0]Processing images:  81%|████████▏ | 74/91 [00:25<00:05,  3.01img/s, Last=❌ 73, Workers=3, Rate Limits=0]

❌ ERROR: Image 74 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 73 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  82%|████████▏ | 75/91 [00:26<00:05,  3.01img/s, Last=❌ 77, Workers=3, Rate Limits=0]Processing images:  84%|████████▎ | 76/91 [00:26<00:05,  2.67img/s, Last=❌ 77, Workers=3, Rate Limits=0]Processing images:  84%|████████▎ | 76/91 [00:26<00:05,  2.67img/s, Last=❌ 76, Workers=3, Rate Limits=0]Processing images:  85%|████████▍ | 77/91 [00:26<00:05,  2.67img/s, Last=❌ 75, Workers=3, Rate Limits=0]

❌ ERROR: Image 77 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 76 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 75 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  86%|████████▌ | 78/91 [00:27<00:04,  2.67img/s, Last=❌ 79, Workers=3, Rate Limits=0]Processing images:  87%|████████▋ | 79/91 [00:27<00:03,  3.01img/s, Last=❌ 79, Workers=3, Rate Limits=0]Processing images:  87%|████████▋ | 79/91 [00:27<00:03,  3.01img/s, Last=❌ 80, Workers=3, Rate Limits=0]Processing images:  88%|████████▊ | 80/91 [00:27<00:03,  3.01img/s, Last=❌ 78, Workers=3, Rate Limits=0]

❌ ERROR: Image 79 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 80 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 78 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  89%|████████▉ | 81/91 [00:28<00:03,  3.01img/s, Last=❌ 81, Workers=3, Rate Limits=0]Processing images:  90%|█████████ | 82/91 [00:28<00:02,  3.24img/s, Last=❌ 81, Workers=3, Rate Limits=0]Processing images:  90%|█████████ | 82/91 [00:28<00:02,  3.24img/s, Last=❌ 82, Workers=3, Rate Limits=0]

❌ ERROR: Image 81 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 82 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  91%|█████████ | 83/91 [00:28<00:02,  3.24img/s, Last=❌ 83, Workers=3, Rate Limits=0]Processing images:  92%|█████████▏| 84/91 [00:28<00:02,  3.33img/s, Last=❌ 83, Workers=3, Rate Limits=0]

❌ ERROR: Image 83 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  92%|█████████▏| 84/91 [00:29<00:02,  3.33img/s, Last=❌ 85, Workers=3, Rate Limits=0]Processing images:  93%|█████████▎| 85/91 [00:29<00:02,  2.61img/s, Last=❌ 85, Workers=3, Rate Limits=0]Processing images:  93%|█████████▎| 85/91 [00:29<00:02,  2.61img/s, Last=❌ 84, Workers=3, Rate Limits=0]Processing images:  95%|█████████▍| 86/91 [00:29<00:01,  2.61img/s, Last=❌ 86, Workers=3, Rate Limits=0]

❌ ERROR: Image 85 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 84 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 86 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  96%|█████████▌| 87/91 [00:30<00:01,  2.61img/s, Last=❌ 89, Workers=3, Rate Limits=0]Processing images:  97%|█████████▋| 88/91 [00:30<00:01,  2.86img/s, Last=❌ 89, Workers=3, Rate Limits=0]Processing images:  97%|█████████▋| 88/91 [00:30<00:01,  2.86img/s, Last=❌ 87, Workers=3, Rate Limits=0]Processing images:  98%|█████████▊| 89/91 [00:30<00:00,  2.86img/s, Last=❌ 88, Workers=3, Rate Limits=0]

❌ ERROR: Image 89 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 87 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 88 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  99%|█████████▉| 90/91 [00:31<00:00,  2.86img/s, Last=❌ 90, Workers=3, Rate Limits=0]Processing images: 100%|██████████| 91/91 [00:31<00:00,  2.99img/s, Last=❌ 90, Workers=3, Rate Limits=0]Processing images: 100%|██████████| 91/91 [00:31<00:00,  2.89img/s, Last=❌ 90, Workers=3, Rate Limits=0]

❌ ERROR: Image 90 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}

📈 Llama 4 Transcription Summary:
   ✅ Successful: 0/91
   ❌ Failed: 91/91
   📊 Success rate: 0.0%
   🔧 Final worker count: 3
   ⚠️  Total rate limit events: 0





✅ Saved 91 records to: tsfrt.gsa.all_pdfs_parsed
✅ File completed successfully:
   📊 Pages: 91
   ✅ Successful transcriptions: 0
   ❌ Failed transcriptions: 91
   ⏱️  Processing time: 42.6s
   💾 Saved to: tsfrt.gsa.all_pdfs_parsed

📄 Processing file 2/6: FY-2023-Annual-Performance-Report_FINAL_508-1282025.pdf
Converting FY-2023-Annual-Performance-Report_FINAL_508-1282025.pdf to base64: 32 pages at 150 DPI...
  Converting page 1/32 to base64
  Converting page 26/32 to base64
  Conversion complete: 32 pages in 2.0s
✅ Saved 32 records to: tsfrt.gsa.all_pdfs_parsed_intermediate
🤖 Starting LLM processing for 32 pages...
🚀 Starting transcription of 32 images...
📊 Model: databricks-llama-4-maverick
⚙️  Workers: 3 (range: 1-3)


Processing images:   0%|          | 0/32 [00:00<?, ?img/s]Processing images:   0%|          | 0/32 [00:00<?, ?img/s, Last=❌ 2, Workers=3, Rate Limits=0]Processing images:   3%|▎         | 1/32 [00:00<00:22,  1.38img/s, Last=❌ 2, Workers=3, Rate Limits=0]Processing images:   3%|▎         | 1/32 [00:00<00:22,  1.38img/s, Last=❌ 1, Workers=3, Rate Limits=0]

❌ ERROR: Image 2 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 1 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:   6%|▋         | 2/32 [00:00<00:21,  1.38img/s, Last=❌ 0, Workers=3, Rate Limits=0]Processing images:   9%|▉         | 3/32 [00:00<00:07,  3.76img/s, Last=❌ 0, Workers=3, Rate Limits=0]

❌ ERROR: Image 0 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:   9%|▉         | 3/32 [00:01<00:07,  3.76img/s, Last=❌ 4, Workers=3, Rate Limits=0]Processing images:  12%|█▎        | 4/32 [00:01<00:13,  2.13img/s, Last=❌ 4, Workers=3, Rate Limits=0]Processing images:  12%|█▎        | 4/32 [00:01<00:13,  2.13img/s, Last=❌ 5, Workers=3, Rate Limits=0]Processing images:  16%|█▌        | 5/32 [00:01<00:12,  2.13img/s, Last=❌ 3, Workers=3, Rate Limits=0]

❌ ERROR: Image 4 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 5 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 3 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  19%|█▉        | 6/32 [00:02<00:12,  2.13img/s, Last=❌ 6, Workers=3, Rate Limits=0]Processing images:  22%|██▏       | 7/32 [00:02<00:09,  2.70img/s, Last=❌ 6, Workers=3, Rate Limits=0]Processing images:  22%|██▏       | 7/32 [00:02<00:09,  2.70img/s, Last=❌ 7, Workers=3, Rate Limits=0]Processing images:  25%|██▌       | 8/32 [00:02<00:08,  2.70img/s, Last=❌ 8, Workers=3, Rate Limits=0]

❌ ERROR: Image 6 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 7 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 8 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  28%|██▊       | 9/32 [00:03<00:08,  2.70img/s, Last=❌ 11, Workers=3, Rate Limits=0]Processing images:  31%|███▏      | 10/32 [00:03<00:07,  2.93img/s, Last=❌ 11, Workers=3, Rate Limits=0]Processing images:  31%|███▏      | 10/32 [00:03<00:07,  2.93img/s, Last=❌ 10, Workers=3, Rate Limits=0]Processing images:  34%|███▍      | 11/32 [00:03<00:07,  2.93img/s, Last=❌ 9, Workers=3, Rate Limits=0] Processing images:  38%|███▊      | 12/32 [00:03<00:05,  3.96img/s, Last=❌ 9, Workers=3, Rate Limits=0]

❌ ERROR: Image 11 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 10 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 9 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  38%|███▊      | 12/32 [00:04<00:05,  3.96img/s, Last=❌ 13, Workers=3, Rate Limits=0]Processing images:  41%|████      | 13/32 [00:04<00:06,  2.81img/s, Last=❌ 13, Workers=3, Rate Limits=0]Processing images:  41%|████      | 13/32 [00:04<00:06,  2.81img/s, Last=❌ 12, Workers=3, Rate Limits=0]Processing images:  44%|████▍     | 14/32 [00:04<00:06,  2.81img/s, Last=❌ 14, Workers=3, Rate Limits=0]

❌ ERROR: Image 13 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 12 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 14 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  47%|████▋     | 15/32 [00:05<00:06,  2.81img/s, Last=❌ 15, Workers=3, Rate Limits=0]Processing images:  50%|█████     | 16/32 [00:05<00:05,  3.04img/s, Last=❌ 15, Workers=3, Rate Limits=0]Processing images:  50%|█████     | 16/32 [00:05<00:05,  3.04img/s, Last=❌ 16, Workers=3, Rate Limits=0]Processing images:  53%|█████▎    | 17/32 [00:05<00:04,  3.04img/s, Last=❌ 17, Workers=3, Rate Limits=0]

❌ ERROR: Image 15 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 16 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 17 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  56%|█████▋    | 18/32 [00:06<00:04,  3.04img/s, Last=❌ 19, Workers=3, Rate Limits=0]Processing images:  59%|█████▉    | 19/32 [00:06<00:04,  3.13img/s, Last=❌ 19, Workers=3, Rate Limits=0]Processing images:  59%|█████▉    | 19/32 [00:06<00:04,  3.13img/s, Last=❌ 18, Workers=3, Rate Limits=0]Processing images:  62%|██████▎   | 20/32 [00:06<00:03,  3.13img/s, Last=❌ 20, Workers=3, Rate Limits=0]

❌ ERROR: Image 19 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 18 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 20 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  66%|██████▌   | 21/32 [00:07<00:03,  3.13img/s, Last=❌ 23, Workers=3, Rate Limits=0]Processing images:  69%|██████▉   | 22/32 [00:07<00:03,  3.25img/s, Last=❌ 23, Workers=3, Rate Limits=0]Processing images:  69%|██████▉   | 22/32 [00:07<00:03,  3.25img/s, Last=❌ 21, Workers=3, Rate Limits=0]Processing images:  72%|███████▏  | 23/32 [00:07<00:02,  3.25img/s, Last=❌ 22, Workers=3, Rate Limits=0]

❌ ERROR: Image 23 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 21 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 22 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  75%|███████▌  | 24/32 [00:07<00:02,  3.25img/s, Last=❌ 25, Workers=3, Rate Limits=0]Processing images:  78%|███████▊  | 25/32 [00:08<00:02,  3.41img/s, Last=❌ 25, Workers=3, Rate Limits=0]Processing images:  78%|███████▊  | 25/32 [00:08<00:02,  3.41img/s, Last=❌ 24, Workers=3, Rate Limits=0]Processing images:  81%|████████▏ | 26/32 [00:08<00:01,  3.41img/s, Last=❌ 26, Workers=3, Rate Limits=0]

❌ ERROR: Image 25 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 24 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 26 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  84%|████████▍ | 27/32 [00:08<00:01,  3.41img/s, Last=❌ 29, Workers=3, Rate Limits=0]Processing images:  88%|████████▊ | 28/32 [00:08<00:01,  3.42img/s, Last=❌ 29, Workers=3, Rate Limits=0]Processing images:  88%|████████▊ | 28/32 [00:08<00:01,  3.42img/s, Last=❌ 27, Workers=3, Rate Limits=0]Processing images:  91%|█████████ | 29/32 [00:08<00:00,  3.42img/s, Last=❌ 28, Workers=3, Rate Limits=0]

❌ ERROR: Image 29 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 27 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 28 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  94%|█████████▍| 30/32 [00:09<00:00,  3.42img/s, Last=❌ 31, Workers=3, Rate Limits=0]Processing images:  97%|█████████▋| 31/32 [00:09<00:00,  3.47img/s, Last=❌ 31, Workers=3, Rate Limits=0]

❌ ERROR: Image 31 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  97%|█████████▋| 31/32 [00:10<00:00,  3.47img/s, Last=❌ 30, Workers=3, Rate Limits=0]Processing images: 100%|██████████| 32/32 [00:10<00:00,  3.03img/s, Last=❌ 30, Workers=3, Rate Limits=0]Processing images: 100%|██████████| 32/32 [00:10<00:00,  3.10img/s, Last=❌ 30, Workers=3, Rate Limits=0]

❌ ERROR: Image 30 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}

📈 Llama 4 Transcription Summary:
   ✅ Successful: 0/32
   ❌ Failed: 32/32
   📊 Success rate: 0.0%
   🔧 Final worker count: 3
   ⚠️  Total rate limit events: 0





✅ Saved 32 records to: tsfrt.gsa.all_pdfs_parsed
✅ File completed successfully:
   📊 Pages: 32
   ✅ Successful transcriptions: 0
   ❌ Failed transcriptions: 32
   ⏱️  Processing time: 20.7s
   💾 Saved to: tsfrt.gsa.all_pdfs_parsed

📄 Processing file 3/6: FY-2026-GSA-Annual-Performance-Plan_5-28-25.pdf
Converting FY-2026-GSA-Annual-Performance-Plan_5-28-25.pdf to base64: 10 pages at 150 DPI...
  Converting page 1/10 to base64
  Conversion complete: 10 pages in 0.9s
✅ Saved 10 records to: tsfrt.gsa.all_pdfs_parsed_intermediate
🤖 Starting LLM processing for 10 pages...
🚀 Starting transcription of 10 images...
📊 Model: databricks-llama-4-maverick
⚙️  Workers: 3 (range: 1-3)


Processing images:   0%|          | 0/10 [00:00<?, ?img/s]Processing images:   0%|          | 0/10 [00:00<?, ?img/s, Last=❌ 2, Workers=3, Rate Limits=0]Processing images:  10%|█         | 1/10 [00:00<00:07,  1.26img/s, Last=❌ 2, Workers=3, Rate Limits=0]Processing images:  10%|█         | 1/10 [00:00<00:07,  1.26img/s, Last=❌ 1, Workers=3, Rate Limits=0]Processing images:  20%|██        | 2/10 [00:00<00:06,  1.26img/s, Last=❌ 0, Workers=3, Rate Limits=0]Processing images:  30%|███       | 3/10 [00:00<00:01,  3.76img/s, Last=❌ 0, Workers=3, Rate Limits=0]

❌ ERROR: Image 2 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 1 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 0 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Request size cannot exceed 4194304 bytes. Please shorten the request.'}


Processing images:  30%|███       | 3/10 [00:01<00:01,  3.76img/s, Last=❌ 5, Workers=3, Rate Limits=0]Processing images:  40%|████      | 4/10 [00:01<00:02,  2.17img/s, Last=❌ 5, Workers=3, Rate Limits=0]Processing images:  40%|████      | 4/10 [00:01<00:02,  2.17img/s, Last=❌ 4, Workers=3, Rate Limits=0]

❌ ERROR: Image 5 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 4 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  50%|█████     | 5/10 [00:02<00:02,  2.17img/s, Last=❌ 3, Workers=3, Rate Limits=0]Processing images:  60%|██████    | 6/10 [00:02<00:01,  3.41img/s, Last=❌ 3, Workers=3, Rate Limits=0]

❌ ERROR: Image 3 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  60%|██████    | 6/10 [00:02<00:01,  3.41img/s, Last=❌ 7, Workers=3, Rate Limits=0]Processing images:  70%|███████   | 7/10 [00:02<00:01,  2.28img/s, Last=❌ 7, Workers=3, Rate Limits=0]Processing images:  70%|███████   | 7/10 [00:02<00:01,  2.28img/s, Last=❌ 6, Workers=3, Rate Limits=0]Processing images:  80%|████████  | 8/10 [00:02<00:00,  2.28img/s, Last=❌ 8, Workers=3, Rate Limits=0]Processing images:  90%|█████████ | 9/10 [00:02<00:00,  3.66img/s, Last=❌ 8, Workers=3, Rate Limits=0]

❌ ERROR: Image 7 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 6 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 8 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  90%|█████████ | 9/10 [00:03<00:00,  3.66img/s, Last=❌ 9, Workers=3, Rate Limits=0]Processing images: 100%|██████████| 10/10 [00:03<00:00,  2.54img/s, Last=❌ 9, Workers=3, Rate Limits=0]Processing images: 100%|██████████| 10/10 [00:03<00:00,  2.65img/s, Last=❌ 9, Workers=3, Rate Limits=0]

❌ ERROR: Image 9 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}

📈 Llama 4 Transcription Summary:
   ✅ Successful: 0/10
   ❌ Failed: 10/10
   📊 Success rate: 0.0%
   🔧 Final worker count: 3
   ⚠️  Total rate limit events: 0





✅ Saved 10 records to: tsfrt.gsa.all_pdfs_parsed
✅ File completed successfully:
   📊 Pages: 10
   ✅ Successful transcriptions: 0
   ❌ Failed transcriptions: 10
   ⏱️  Processing time: 7.9s
   💾 Saved to: tsfrt.gsa.all_pdfs_parsed

📄 Processing file 4/6: GSA-FY2024-Annual-Performance-Report-Final-508-Jan-2025.pdf
Converting GSA-FY2024-Annual-Performance-Report-Final-508-Jan-2025.pdf to base64: 29 pages at 150 DPI...
  Converting page 1/29 to base64
  Converting page 26/29 to base64
  Conversion complete: 29 pages in 1.9s
✅ Saved 29 records to: tsfrt.gsa.all_pdfs_parsed_intermediate
🤖 Starting LLM processing for 29 pages...
🚀 Starting transcription of 29 images...
📊 Model: databricks-llama-4-maverick
⚙️  Workers: 3 (range: 1-3)


Processing images:   0%|          | 0/29 [00:00<?, ?img/s]Processing images:   0%|          | 0/29 [00:00<?, ?img/s, Last=❌ 1, Workers=3, Rate Limits=0]Processing images:   3%|▎         | 1/29 [00:00<00:18,  1.50img/s, Last=❌ 1, Workers=3, Rate Limits=0]Processing images:   3%|▎         | 1/29 [00:00<00:18,  1.50img/s, Last=❌ 2, Workers=3, Rate Limits=0]

❌ ERROR: Image 1 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 2 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:   7%|▋         | 2/29 [00:01<00:18,  1.50img/s, Last=❌ 0, Workers=3, Rate Limits=0]Processing images:  10%|█         | 3/29 [00:01<00:08,  3.22img/s, Last=❌ 0, Workers=3, Rate Limits=0]

❌ ERROR: Image 0 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  10%|█         | 3/29 [00:01<00:08,  3.22img/s, Last=❌ 3, Workers=3, Rate Limits=0]Processing images:  14%|█▍        | 4/29 [00:01<00:13,  1.91img/s, Last=❌ 3, Workers=3, Rate Limits=0]Processing images:  14%|█▍        | 4/29 [00:01<00:13,  1.91img/s, Last=❌ 5, Workers=3, Rate Limits=0]Processing images:  17%|█▋        | 5/29 [00:01<00:12,  1.91img/s, Last=❌ 4, Workers=3, Rate Limits=0]

❌ ERROR: Image 3 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 5 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 4 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  21%|██        | 6/29 [00:02<00:12,  1.91img/s, Last=❌ 6, Workers=3, Rate Limits=0]Processing images:  24%|██▍       | 7/29 [00:02<00:08,  2.66img/s, Last=❌ 6, Workers=3, Rate Limits=0]Processing images:  24%|██▍       | 7/29 [00:02<00:08,  2.66img/s, Last=❌ 7, Workers=3, Rate Limits=0]Processing images:  28%|██▊       | 8/29 [00:02<00:07,  2.66img/s, Last=❌ 8, Workers=3, Rate Limits=0]

❌ ERROR: Image 6 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 7 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 8 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  31%|███       | 9/29 [00:03<00:07,  2.66img/s, Last=❌ 10, Workers=3, Rate Limits=0]Processing images:  34%|███▍      | 10/29 [00:03<00:06,  2.93img/s, Last=❌ 10, Workers=3, Rate Limits=0]Processing images:  34%|███▍      | 10/29 [00:03<00:06,  2.93img/s, Last=❌ 9, Workers=3, Rate Limits=0] Processing images:  38%|███▊      | 11/29 [00:03<00:06,  2.93img/s, Last=❌ 11, Workers=3, Rate Limits=0]

❌ ERROR: Image 10 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 9 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 11 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  41%|████▏     | 12/29 [00:04<00:05,  2.93img/s, Last=❌ 13, Workers=3, Rate Limits=0]Processing images:  45%|████▍     | 13/29 [00:04<00:05,  3.07img/s, Last=❌ 13, Workers=3, Rate Limits=0]Processing images:  45%|████▍     | 13/29 [00:04<00:05,  3.07img/s, Last=❌ 12, Workers=3, Rate Limits=0]Processing images:  48%|████▊     | 14/29 [00:04<00:04,  3.07img/s, Last=❌ 14, Workers=3, Rate Limits=0]

❌ ERROR: Image 13 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 12 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 14 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  52%|█████▏    | 15/29 [00:05<00:04,  3.07img/s, Last=❌ 17, Workers=3, Rate Limits=0]Processing images:  55%|█████▌    | 16/29 [00:05<00:04,  2.94img/s, Last=❌ 17, Workers=3, Rate Limits=0]Processing images:  55%|█████▌    | 16/29 [00:05<00:04,  2.94img/s, Last=❌ 15, Workers=3, Rate Limits=0]Processing images:  59%|█████▊    | 17/29 [00:05<00:04,  2.94img/s, Last=❌ 16, Workers=3, Rate Limits=0]

❌ ERROR: Image 17 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 15 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 16 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  62%|██████▏   | 18/29 [00:06<00:03,  2.94img/s, Last=❌ 18, Workers=3, Rate Limits=0]Processing images:  66%|██████▌   | 19/29 [00:06<00:03,  3.03img/s, Last=❌ 18, Workers=3, Rate Limits=0]Processing images:  66%|██████▌   | 19/29 [00:06<00:03,  3.03img/s, Last=❌ 19, Workers=3, Rate Limits=0]Processing images:  69%|██████▉   | 20/29 [00:06<00:02,  3.03img/s, Last=❌ 20, Workers=3, Rate Limits=0]Processing images:  72%|███████▏  | 21/29 [00:06<00:02,  3.67img/s, Last=❌ 20, Workers=3, Rate Limits=0]

❌ ERROR: Image 18 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 19 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 20 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  72%|███████▏  | 21/29 [00:07<00:02,  3.67img/s, Last=❌ 21, Workers=3, Rate Limits=0]Processing images:  76%|███████▌  | 22/29 [00:07<00:02,  2.67img/s, Last=❌ 21, Workers=3, Rate Limits=0]

❌ ERROR: Image 21 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  76%|███████▌  | 22/29 [00:08<00:02,  2.67img/s, Last=❌ 23, Workers=3, Rate Limits=0]Processing images:  79%|███████▉  | 23/29 [00:08<00:02,  2.87img/s, Last=❌ 23, Workers=3, Rate Limits=0]Processing images:  79%|███████▉  | 23/29 [00:08<00:02,  2.87img/s, Last=❌ 22, Workers=3, Rate Limits=0]

❌ ERROR: Image 23 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 22 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  83%|████████▎ | 24/29 [00:09<00:01,  2.87img/s, Last=❌ 26, Workers=3, Rate Limits=0]Processing images:  86%|████████▌ | 25/29 [00:09<00:01,  2.42img/s, Last=❌ 26, Workers=3, Rate Limits=0]Processing images:  86%|████████▌ | 25/29 [00:09<00:01,  2.42img/s, Last=❌ 25, Workers=3, Rate Limits=0]Processing images:  90%|████████▉ | 26/29 [00:09<00:01,  2.42img/s, Last=❌ 24, Workers=3, Rate Limits=0]

❌ ERROR: Image 26 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 25 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 24 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  93%|█████████▎| 27/29 [00:10<00:00,  2.42img/s, Last=❌ 28, Workers=3, Rate Limits=0]Processing images:  97%|█████████▋| 28/29 [00:10<00:00,  2.72img/s, Last=❌ 28, Workers=3, Rate Limits=0]Processing images:  97%|█████████▋| 28/29 [00:10<00:00,  2.72img/s, Last=❌ 27, Workers=3, Rate Limits=0]Processing images: 100%|██████████| 29/29 [00:10<00:00,  2.87img/s, Last=❌ 27, Workers=3, Rate Limits=0]

❌ ERROR: Image 28 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 27 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}

📈 Llama 4 Transcription Summary:
   ✅ Successful: 0/29
   ❌ Failed: 29/29
   📊 Success rate: 0.0%
   🔧 Final worker count: 3
   ⚠️  Total rate limit events: 0





✅ Saved 29 records to: tsfrt.gsa.all_pdfs_parsed
✅ File completed successfully:
   📊 Pages: 29
   ✅ Successful transcriptions: 0
   ❌ Failed transcriptions: 29
   ⏱️  Processing time: 16.3s
   💾 Saved to: tsfrt.gsa.all_pdfs_parsed

📄 Processing file 5/6: GSA_Annual_Performance_Plan_FY_2023_FINAL_508.pdf
Converting GSA_Annual_Performance_Plan_FY_2023_FINAL_508.pdf to base64: 90 pages at 150 DPI...
  Converting page 1/90 to base64
  Converting page 26/90 to base64
  Converting page 51/90 to base64
  Converting page 76/90 to base64
  Conversion complete: 90 pages in 5.0s
✅ Saved 90 records to: tsfrt.gsa.all_pdfs_parsed_intermediate
🤖 Starting LLM processing for 90 pages...
🚀 Starting transcription of 90 images...
📊 Model: databricks-llama-4-maverick
⚙️  Workers: 3 (range: 1-3)


Processing images:   0%|          | 0/90 [00:00<?, ?img/s]Processing images:   0%|          | 0/90 [00:00<?, ?img/s, Last=❌ 1, Workers=3, Rate Limits=0]Processing images:   1%|          | 1/90 [00:00<01:18,  1.14img/s, Last=❌ 1, Workers=3, Rate Limits=0]Processing images:   1%|          | 1/90 [00:00<01:18,  1.14img/s, Last=❌ 2, Workers=3, Rate Limits=0]

❌ ERROR: Image 1 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 2 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:   2%|▏         | 2/90 [00:01<01:17,  1.14img/s, Last=❌ 0, Workers=3, Rate Limits=0]Processing images:   3%|▎         | 3/90 [00:01<00:28,  3.06img/s, Last=❌ 0, Workers=3, Rate Limits=0]

❌ ERROR: Image 0 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:   3%|▎         | 3/90 [00:02<00:28,  3.06img/s, Last=❌ 5, Workers=3, Rate Limits=0]Processing images:   4%|▍         | 4/90 [00:02<00:52,  1.63img/s, Last=❌ 5, Workers=3, Rate Limits=0]Processing images:   4%|▍         | 4/90 [00:02<00:52,  1.63img/s, Last=❌ 4, Workers=3, Rate Limits=0]

❌ ERROR: Image 5 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 4 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:   6%|▌         | 5/90 [00:02<00:52,  1.63img/s, Last=❌ 3, Workers=3, Rate Limits=0]Processing images:   7%|▋         | 6/90 [00:02<00:31,  2.70img/s, Last=❌ 3, Workers=3, Rate Limits=0]

❌ ERROR: Image 3 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:   7%|▋         | 6/90 [00:03<00:31,  2.70img/s, Last=❌ 7, Workers=3, Rate Limits=0]Processing images:   8%|▊         | 7/90 [00:03<00:45,  1.83img/s, Last=❌ 7, Workers=3, Rate Limits=0]Processing images:   8%|▊         | 7/90 [00:03<00:45,  1.83img/s, Last=❌ 6, Workers=3, Rate Limits=0]Processing images:   9%|▉         | 8/90 [00:03<00:44,  1.83img/s, Last=❌ 8, Workers=3, Rate Limits=0]Processing images:  10%|█         | 9/90 [00:03<00:28,  2.89img/s, Last=❌ 8, Workers=3, Rate Limits=0]

❌ ERROR: Image 7 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 6 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 8 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  10%|█         | 9/90 [00:04<00:28,  2.89img/s, Last=❌ 9, Workers=3, Rate Limits=0]Processing images:  11%|█         | 10/90 [00:04<00:42,  1.90img/s, Last=❌ 9, Workers=3, Rate Limits=0]Processing images:  11%|█         | 10/90 [00:04<00:42,  1.90img/s, Last=❌ 11, Workers=3, Rate Limits=0]Processing images:  12%|█▏        | 11/90 [00:04<00:41,  1.90img/s, Last=❌ 10, Workers=3, Rate Limits=0]

❌ ERROR: Image 9 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 11 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 10 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  13%|█▎        | 12/90 [00:06<00:41,  1.90img/s, Last=❌ 12, Workers=3, Rate Limits=0]Processing images:  14%|█▍        | 13/90 [00:06<00:35,  2.17img/s, Last=❌ 12, Workers=3, Rate Limits=0]Processing images:  14%|█▍        | 13/90 [00:06<00:35,  2.17img/s, Last=❌ 13, Workers=3, Rate Limits=0]Processing images:  16%|█▌        | 14/90 [00:06<00:35,  2.17img/s, Last=❌ 14, Workers=3, Rate Limits=0]

❌ ERROR: Image 12 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 13 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 14 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  17%|█▋        | 15/90 [00:06<00:34,  2.17img/s, Last=❌ 16, Workers=3, Rate Limits=0]Processing images:  18%|█▊        | 16/90 [00:06<00:29,  2.53img/s, Last=❌ 16, Workers=3, Rate Limits=0]Processing images:  18%|█▊        | 16/90 [00:07<00:29,  2.53img/s, Last=❌ 17, Workers=3, Rate Limits=0]

❌ ERROR: Image 16 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 17 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  19%|█▉        | 17/90 [00:07<00:28,  2.53img/s, Last=❌ 15, Workers=3, Rate Limits=0]Processing images:  20%|██        | 18/90 [00:07<00:22,  3.20img/s, Last=❌ 15, Workers=3, Rate Limits=0]

❌ ERROR: Image 15 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  20%|██        | 18/90 [00:08<00:22,  3.20img/s, Last=❌ 18, Workers=3, Rate Limits=0]Processing images:  21%|██        | 19/90 [00:08<00:29,  2.44img/s, Last=❌ 18, Workers=3, Rate Limits=0]Processing images:  21%|██        | 19/90 [00:08<00:29,  2.44img/s, Last=❌ 20, Workers=3, Rate Limits=0]Processing images:  22%|██▏       | 20/90 [00:08<00:24,  2.85img/s, Last=❌ 20, Workers=3, Rate Limits=0]

❌ ERROR: Image 18 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 20 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  22%|██▏       | 20/90 [00:08<00:24,  2.85img/s, Last=❌ 19, Workers=3, Rate Limits=0]Processing images:  23%|██▎       | 21/90 [00:08<00:20,  3.34img/s, Last=❌ 19, Workers=3, Rate Limits=0]

❌ ERROR: Image 19 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  23%|██▎       | 21/90 [00:09<00:20,  3.34img/s, Last=❌ 23, Workers=3, Rate Limits=0]Processing images:  24%|██▍       | 22/90 [00:09<00:31,  2.17img/s, Last=❌ 23, Workers=3, Rate Limits=0]Processing images:  24%|██▍       | 22/90 [00:09<00:31,  2.17img/s, Last=❌ 22, Workers=3, Rate Limits=0]Processing images:  26%|██▌       | 23/90 [00:09<00:24,  2.70img/s, Last=❌ 22, Workers=3, Rate Limits=0]Processing images:  26%|██▌       | 23/90 [00:09<00:24,  2.70img/s, Last=❌ 21, Workers=3, Rate Limits=0]

❌ ERROR: Image 23 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 22 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 21 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  27%|██▋       | 24/90 [00:10<00:24,  2.70img/s, Last=❌ 24, Workers=3, Rate Limits=0]Processing images:  28%|██▊       | 25/90 [00:10<00:28,  2.30img/s, Last=❌ 24, Workers=3, Rate Limits=0]Processing images:  28%|██▊       | 25/90 [00:10<00:28,  2.30img/s, Last=❌ 26, Workers=3, Rate Limits=0]Processing images:  29%|██▉       | 26/90 [00:10<00:27,  2.30img/s, Last=❌ 25, Workers=3, Rate Limits=0]Processing images:  30%|███       | 27/90 [00:10<00:18,  3.36img/s, Last=❌ 25, Workers=3, Rate Limits=0]

❌ ERROR: Image 24 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 26 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 25 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  30%|███       | 27/90 [00:11<00:18,  3.36img/s, Last=❌ 29, Workers=3, Rate Limits=0]Processing images:  31%|███       | 28/90 [00:11<00:25,  2.43img/s, Last=❌ 29, Workers=3, Rate Limits=0]Processing images:  31%|███       | 28/90 [00:11<00:25,  2.43img/s, Last=❌ 28, Workers=3, Rate Limits=0]

❌ ERROR: Image 29 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 28 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  32%|███▏      | 29/90 [00:11<00:25,  2.43img/s, Last=❌ 27, Workers=3, Rate Limits=0]Processing images:  33%|███▎      | 30/90 [00:11<00:19,  3.02img/s, Last=❌ 27, Workers=3, Rate Limits=0]

❌ ERROR: Image 27 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  33%|███▎      | 30/90 [00:12<00:19,  3.02img/s, Last=❌ 30, Workers=3, Rate Limits=0]Processing images:  34%|███▍      | 31/90 [00:12<00:28,  2.10img/s, Last=❌ 30, Workers=3, Rate Limits=0]Processing images:  34%|███▍      | 31/90 [00:12<00:28,  2.10img/s, Last=❌ 32, Workers=3, Rate Limits=0]Processing images:  36%|███▌      | 32/90 [00:12<00:23,  2.47img/s, Last=❌ 32, Workers=3, Rate Limits=0]Processing images:  36%|███▌      | 32/90 [00:12<00:23,  2.47img/s, Last=❌ 31, Workers=3, Rate Limits=0]

❌ ERROR: Image 30 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 32 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 31 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  37%|███▋      | 33/90 [00:14<00:23,  2.47img/s, Last=❌ 35, Workers=3, Rate Limits=0]Processing images:  38%|███▊      | 34/90 [00:14<00:25,  2.18img/s, Last=❌ 35, Workers=3, Rate Limits=0]Processing images:  38%|███▊      | 34/90 [00:14<00:25,  2.18img/s, Last=❌ 34, Workers=3, Rate Limits=0]Processing images:  39%|███▉      | 35/90 [00:14<00:25,  2.18img/s, Last=❌ 33, Workers=3, Rate Limits=0]Processing images:  40%|████      | 36/90 [00:14<00:16,  3.22img/s, Last=❌ 33, Workers=3, Rate Limits=0]

❌ ERROR: Image 35 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 34 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 33 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  40%|████      | 36/90 [00:15<00:16,  3.22img/s, Last=❌ 38, Workers=3, Rate Limits=0]Processing images:  41%|████      | 37/90 [00:15<00:24,  2.20img/s, Last=❌ 38, Workers=3, Rate Limits=0]Processing images:  41%|████      | 37/90 [00:15<00:24,  2.20img/s, Last=❌ 36, Workers=3, Rate Limits=0]Processing images:  42%|████▏     | 38/90 [00:15<00:23,  2.20img/s, Last=❌ 37, Workers=3, Rate Limits=0]Processing images:  43%|████▎     | 39/90 [00:15<00:16,  3.19img/s, Last=❌ 37, Workers=3, Rate Limits=0]

❌ ERROR: Image 38 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 36 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 37 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  43%|████▎     | 39/90 [00:16<00:16,  3.19img/s, Last=❌ 40, Workers=3, Rate Limits=0]Processing images:  44%|████▍     | 40/90 [00:16<00:20,  2.38img/s, Last=❌ 40, Workers=3, Rate Limits=0]Processing images:  44%|████▍     | 40/90 [00:16<00:20,  2.38img/s, Last=❌ 39, Workers=3, Rate Limits=0]Processing images:  46%|████▌     | 41/90 [00:16<00:20,  2.38img/s, Last=❌ 41, Workers=3, Rate Limits=0]

❌ ERROR: Image 40 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 39 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 41 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  47%|████▋     | 42/90 [00:17<00:20,  2.38img/s, Last=❌ 44, Workers=3, Rate Limits=0]Processing images:  48%|████▊     | 43/90 [00:17<00:18,  2.57img/s, Last=❌ 44, Workers=3, Rate Limits=0]Processing images:  48%|████▊     | 43/90 [00:17<00:18,  2.57img/s, Last=❌ 42, Workers=3, Rate Limits=0]Processing images:  49%|████▉     | 44/90 [00:17<00:17,  2.57img/s, Last=❌ 43, Workers=3, Rate Limits=0]

❌ ERROR: Image 44 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 42 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 43 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  50%|█████     | 45/90 [00:18<00:17,  2.57img/s, Last=❌ 47, Workers=3, Rate Limits=0]Processing images:  51%|█████     | 46/90 [00:18<00:17,  2.57img/s, Last=❌ 47, Workers=3, Rate Limits=0]Processing images:  51%|█████     | 46/90 [00:18<00:17,  2.57img/s, Last=❌ 46, Workers=3, Rate Limits=0]Processing images:  52%|█████▏    | 47/90 [00:18<00:16,  2.57img/s, Last=❌ 45, Workers=3, Rate Limits=0]Processing images:  53%|█████▎    | 48/90 [00:18<00:12,  3.35img/s, Last=❌ 45, Workers=3, Rate Limits=0]

❌ ERROR: Image 47 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 46 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 45 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  53%|█████▎    | 48/90 [00:19<00:12,  3.35img/s, Last=❌ 48, Workers=3, Rate Limits=0]Processing images:  54%|█████▍    | 49/90 [00:19<00:17,  2.40img/s, Last=❌ 48, Workers=3, Rate Limits=0]Processing images:  54%|█████▍    | 49/90 [00:19<00:17,  2.40img/s, Last=❌ 50, Workers=3, Rate Limits=0]Processing images:  56%|█████▌    | 50/90 [00:19<00:14,  2.73img/s, Last=❌ 50, Workers=3, Rate Limits=0]

❌ ERROR: Image 48 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 50 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  56%|█████▌    | 50/90 [00:19<00:14,  2.73img/s, Last=❌ 49, Workers=3, Rate Limits=0]

❌ ERROR: Image 49 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  57%|█████▋    | 51/90 [00:20<00:14,  2.73img/s, Last=❌ 51, Workers=3, Rate Limits=0]Processing images:  58%|█████▊    | 52/90 [00:20<00:16,  2.25img/s, Last=❌ 51, Workers=3, Rate Limits=0]Processing images:  58%|█████▊    | 52/90 [00:20<00:16,  2.25img/s, Last=❌ 53, Workers=3, Rate Limits=0]Processing images:  59%|█████▉    | 53/90 [00:20<00:16,  2.25img/s, Last=❌ 52, Workers=3, Rate Limits=0]

❌ ERROR: Image 51 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 53 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 52 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  60%|██████    | 54/90 [00:21<00:16,  2.25img/s, Last=❌ 56, Workers=3, Rate Limits=0]Processing images:  61%|██████    | 55/90 [00:22<00:14,  2.37img/s, Last=❌ 56, Workers=3, Rate Limits=0]Processing images:  61%|██████    | 55/90 [00:22<00:14,  2.37img/s, Last=❌ 54, Workers=3, Rate Limits=0]

❌ ERROR: Image 54 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}❌ ERROR: Image 56 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}



Processing images:  62%|██████▏   | 56/90 [00:22<00:14,  2.37img/s, Last=❌ 55, Workers=3, Rate Limits=0]Processing images:  63%|██████▎   | 57/90 [00:22<00:11,  2.93img/s, Last=❌ 55, Workers=3, Rate Limits=0]

❌ ERROR: Image 55 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  63%|██████▎   | 57/90 [00:23<00:11,  2.93img/s, Last=❌ 58, Workers=3, Rate Limits=0]Processing images:  64%|██████▍   | 58/90 [00:23<00:14,  2.14img/s, Last=❌ 58, Workers=3, Rate Limits=0]

❌ ERROR: Image 58 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  64%|██████▍   | 58/90 [00:23<00:14,  2.14img/s, Last=❌ 59, Workers=3, Rate Limits=0]Processing images:  66%|██████▌   | 59/90 [00:23<00:13,  2.32img/s, Last=❌ 59, Workers=3, Rate Limits=0]Processing images:  66%|██████▌   | 59/90 [00:23<00:13,  2.32img/s, Last=❌ 57, Workers=3, Rate Limits=0]

❌ ERROR: Image 59 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 57 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  67%|██████▋   | 60/90 [00:24<00:12,  2.32img/s, Last=❌ 62, Workers=3, Rate Limits=0]Processing images:  68%|██████▊   | 61/90 [00:24<00:12,  2.24img/s, Last=❌ 62, Workers=3, Rate Limits=0]Processing images:  68%|██████▊   | 61/90 [00:24<00:12,  2.24img/s, Last=❌ 61, Workers=3, Rate Limits=0]

❌ ERROR: Image 62 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 61 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  69%|██████▉   | 62/90 [00:24<00:12,  2.24img/s, Last=❌ 60, Workers=3, Rate Limits=0]Processing images:  70%|███████   | 63/90 [00:24<00:09,  2.96img/s, Last=❌ 60, Workers=3, Rate Limits=0]

❌ ERROR: Image 60 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  70%|███████   | 63/90 [00:25<00:09,  2.96img/s, Last=❌ 63, Workers=3, Rate Limits=0]Processing images:  71%|███████   | 64/90 [00:25<00:12,  2.12img/s, Last=❌ 63, Workers=3, Rate Limits=0]Processing images:  71%|███████   | 64/90 [00:25<00:12,  2.12img/s, Last=❌ 64, Workers=3, Rate Limits=0]Processing images:  72%|███████▏  | 65/90 [00:25<00:11,  2.12img/s, Last=❌ 65, Workers=3, Rate Limits=0]

❌ ERROR: Image 63 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 64 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 65 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  73%|███████▎  | 66/90 [00:26<00:11,  2.12img/s, Last=❌ 66, Workers=3, Rate Limits=0]Processing images:  74%|███████▍  | 67/90 [00:26<00:08,  2.67img/s, Last=❌ 66, Workers=3, Rate Limits=0]Processing images:  74%|███████▍  | 67/90 [00:26<00:08,  2.67img/s, Last=❌ 68, Workers=3, Rate Limits=0]Processing images:  76%|███████▌  | 68/90 [00:26<00:08,  2.67img/s, Last=❌ 67, Workers=3, Rate Limits=0]

❌ ERROR: Image 66 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 68 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 67 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  77%|███████▋  | 69/90 [00:27<00:07,  2.67img/s, Last=❌ 70, Workers=3, Rate Limits=0]Processing images:  78%|███████▊  | 70/90 [00:27<00:07,  2.68img/s, Last=❌ 70, Workers=3, Rate Limits=0]Processing images:  78%|███████▊  | 70/90 [00:27<00:07,  2.68img/s, Last=❌ 69, Workers=3, Rate Limits=0]Processing images:  79%|███████▉  | 71/90 [00:27<00:07,  2.68img/s, Last=❌ 71, Workers=3, Rate Limits=0]

❌ ERROR: Image 70 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 69 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 71 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  80%|████████  | 72/90 [00:28<00:06,  2.68img/s, Last=❌ 74, Workers=3, Rate Limits=0]Processing images:  81%|████████  | 73/90 [00:28<00:05,  2.87img/s, Last=❌ 74, Workers=3, Rate Limits=0]Processing images:  81%|████████  | 73/90 [00:28<00:05,  2.87img/s, Last=❌ 72, Workers=3, Rate Limits=0]Processing images:  82%|████████▏ | 74/90 [00:28<00:05,  2.87img/s, Last=❌ 73, Workers=3, Rate Limits=0]

❌ ERROR: Image 72 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}❌ ERROR: Image 74 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}

❌ ERROR: Image 73 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  83%|████████▎ | 75/90 [00:29<00:05,  2.87img/s, Last=❌ 76, Workers=3, Rate Limits=0]Processing images:  84%|████████▍ | 76/90 [00:29<00:05,  2.79img/s, Last=❌ 76, Workers=3, Rate Limits=0]Processing images:  84%|████████▍ | 76/90 [00:29<00:05,  2.79img/s, Last=❌ 77, Workers=3, Rate Limits=0]Processing images:  86%|████████▌ | 77/90 [00:29<00:04,  2.79img/s, Last=❌ 75, Workers=3, Rate Limits=0]Processing images:  87%|████████▋ | 78/90 [00:29<00:03,  3.47img/s, Last=❌ 75, Workers=3, Rate Limits=0]

❌ ERROR: Image 76 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 77 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 75 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  87%|████████▋ | 78/90 [00:30<00:03,  3.47img/s, Last=❌ 79, Workers=3, Rate Limits=0]Processing images:  88%|████████▊ | 79/90 [00:30<00:04,  2.59img/s, Last=❌ 79, Workers=3, Rate Limits=0]Processing images:  88%|████████▊ | 79/90 [00:31<00:04,  2.59img/s, Last=❌ 78, Workers=3, Rate Limits=0]Processing images:  89%|████████▉ | 80/90 [00:31<00:03,  2.88img/s, Last=❌ 78, Workers=3, Rate Limits=0]Processing images:  89%|████████▉ | 80/90 [00:31<00:03,  2.88img/s, Last=❌ 80, Workers=3, Rate Limits=0]

❌ ERROR: Image 79 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 78 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 80 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  90%|█████████ | 81/90 [00:32<00:03,  2.88img/s, Last=❌ 81, Workers=3, Rate Limits=0]Processing images:  91%|█████████ | 82/90 [00:32<00:03,  2.38img/s, Last=❌ 81, Workers=3, Rate Limits=0]Processing images:  91%|█████████ | 82/90 [00:32<00:03,  2.38img/s, Last=❌ 83, Workers=3, Rate Limits=0]Processing images:  92%|█████████▏| 83/90 [00:32<00:02,  2.38img/s, Last=❌ 82, Workers=3, Rate Limits=0]

❌ ERROR: Image 81 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 83 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 82 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  93%|█████████▎| 84/90 [00:33<00:02,  2.38img/s, Last=❌ 86, Workers=3, Rate Limits=0]Processing images:  94%|█████████▍| 85/90 [00:33<00:02,  2.44img/s, Last=❌ 86, Workers=3, Rate Limits=0]Processing images:  94%|█████████▍| 85/90 [00:33<00:02,  2.44img/s, Last=❌ 84, Workers=3, Rate Limits=0]Processing images:  96%|█████████▌| 86/90 [00:33<00:01,  2.79img/s, Last=❌ 84, Workers=3, Rate Limits=0]Processing images:  96%|█████████▌| 86/90 [00:33<00:01,  2.79img/s, Last=❌ 85, Workers=3, Rate Limits=0]

❌ ERROR: Image 86 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 84 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 85 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}


Processing images:  97%|█████████▋| 87/90 [00:34<00:01,  2.79img/s, Last=❌ 88, Workers=3, Rate Limits=0]Processing images:  98%|█████████▊| 88/90 [00:34<00:00,  2.39img/s, Last=❌ 88, Workers=3, Rate Limits=0]Processing images:  98%|█████████▊| 88/90 [00:34<00:00,  2.39img/s, Last=❌ 89, Workers=3, Rate Limits=0]Processing images:  99%|█████████▉| 89/90 [00:34<00:00,  2.39img/s, Last=❌ 87, Workers=3, Rate Limits=0]Processing images: 100%|██████████| 90/90 [00:34<00:00,  3.30img/s, Last=❌ 87, Workers=3, Rate Limits=0]Processing images: 100%|██████████| 90/90 [00:34<00:00,  2.60img/s, Last=❌ 87, Workers=3, Rate Limits=0]

❌ ERROR: Image 88 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 89 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}
❌ ERROR: Image 87 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}

📈 Llama 4 Transcription Summary:
   ✅ Successful: 0/90
   ❌ Failed: 90/90
   📊 Success rate: 0.0%
   🔧 Final worker count: 3
   ⚠️  Total rate limit events: 0





✅ Saved 90 records to: tsfrt.gsa.all_pdfs_parsed
✅ File completed successfully:
   📊 Pages: 90
   ✅ Successful transcriptions: 0
   ❌ Failed transcriptions: 90
   ⏱️  Processing time: 43.8s
   💾 Saved to: tsfrt.gsa.all_pdfs_parsed

📄 Processing file 6/6: executive-documents.pdf
Converting executive-documents.pdf to base64: 1 pages at 150 DPI...
  Converting page 1/1 to base64
  Conversion complete: 1 pages in 0.0s
✅ Saved 1 records to: tsfrt.gsa.all_pdfs_parsed_intermediate
🤖 Starting LLM processing for 1 pages...
🚀 Starting transcription of 1 images...
📊 Model: databricks-llama-4-maverick
⚙️  Workers: 3 (range: 1-3)


Processing images:   0%|          | 0/1 [00:00<?, ?img/s]Processing images:   0%|          | 0/1 [00:00<?, ?img/s, Last=❌ 0, Workers=3, Rate Limits=0]Processing images: 100%|██████████| 1/1 [00:00<00:00,  2.30img/s, Last=❌ 0, Workers=3, Rate Limits=0]Processing images: 100%|██████████| 1/1 [00:00<00:00,  2.29img/s, Last=❌ 0, Workers=3, Rate Limits=0]

❌ ERROR: Image 0 failed with non-retryable error: Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Invalid base64 string for image\n'}

📈 Llama 4 Transcription Summary:
   ✅ Successful: 0/1
   ❌ Failed: 1/1
   📊 Success rate: 0.0%
   🔧 Final worker count: 3
   ⚠️  Total rate limit events: 0





✅ Saved 1 records to: tsfrt.gsa.all_pdfs_parsed
✅ File completed successfully:
   📊 Pages: 1
   ✅ Successful transcriptions: 0
   ❌ Failed transcriptions: 1
   ⏱️  Processing time: 3.7s
   💾 Saved to: tsfrt.gsa.all_pdfs_parsed

🎊 BATCH PROCESSING COMPLETE
📊 Files processed: 6/6
📄 Total pages processed: 253
✅ Successful files: 6
❌ Failed files: 0
💾 All results combined in: tsfrt.gsa.all_pdfs_parsed

📋 PROCESSING LOG:
   ✅ FY 2024 GSA Annual Performance Plan FY 2022 Report_Final_508.pdf: SUCCESS (91 pages, 42.6s)
   ✅ FY-2023-Annual-Performance-Report_FINAL_508-1282025.pdf: SUCCESS (32 pages, 20.7s)
   ✅ FY-2026-GSA-Annual-Performance-Plan_5-28-25.pdf: SUCCESS (10 pages, 7.9s)
   ✅ GSA-FY2024-Annual-Performance-Report-Final-508-Jan-2025.pdf: SUCCESS (29 pages, 16.3s)
   ✅ GSA_Annual_Performance_Plan_FY_2023_FINAL_508.pdf: SUCCESS (90 pages, 43.8s)
   ✅ executive-documents.pdf: SUCCESS (1 pages, 3.7s)


[Trace(request_id=tr-58d00ec5d0d44bf18daffb169a1b8005), Trace(request_id=tr-af4f56b9004d44419aba5e60e4327d82), Trace(request_id=tr-e723ad506b3b44258017c0e4795d4d63), Trace(request_id=tr-99fbce238c5441cea188f856a38c9039), Trace(request_id=tr-29fad26fede54477809c0add1849a8eb), Trace(request_id=tr-68109773fba943de8e692fe7f601e26e), Trace(request_id=tr-cb127c658a4f489d817df6fd82e0bfb4), Trace(request_id=tr-1be5acbe81b545e88d603e92ad63bc29), Trace(request_id=tr-4272d7a2a4684400849c38f062131b22), Trace(request_id=tr-46ec44ecfc4143dd84f20018421fad20)]

In [0]:
# If using combined mode, show summary statistics
if PROCESSING_MODE == "combined":
    summary_df = spark.sql(f"""
        SELECT 
            file_name,
            doc_name,
            COUNT(*) as total_pages,
            SUM(CASE WHEN transcription NOT LIKE 'ERROR:%' THEN 1 ELSE 0 END) as successful_pages,
            SUM(CASE WHEN transcription LIKE 'ERROR:%' THEN 1 ELSE 0 END) as failed_pages,
            AVG(page_text_length) as avg_page_text_length,
            MIN(processed_timestamp) as first_processed,
            MAX(processed_timestamp) as last_processed
        FROM {FINAL_TABLE}
        GROUP BY file_name, doc_name
        ORDER BY file_name
    """)
    
    print("📊 PROCESSING SUMMARY BY FILE:")
display(summary_df)

📊 PROCESSING SUMMARY BY FILE:


file_name,doc_name,total_pages,successful_pages,failed_pages,avg_page_text_length,first_processed,last_processed
FY 2024 GSA Annual Performance Plan FY 2022 Report_Final_508.pdf,fy_2024_gsa_annual_performance_plan_fy_2022_report_final_508,91,0,91,2615.318681318681,2025-08-05T01:57:28.973886Z,2025-08-05T01:57:34.21837Z
FY-2023-Annual-Performance-Report_FINAL_508-1282025.pdf,fy_2023_annual_performance_report_final_508_1282025,32,0,32,2601.03125,2025-08-05T01:58:11.377245Z,2025-08-05T01:58:13.176927Z
FY-2026-GSA-Annual-Performance-Plan_5-28-25.pdf,fy_2026_gsa_annual_performance_plan_5_28_25,10,0,10,2194.1,2025-08-05T01:58:32.228746Z,2025-08-05T01:58:32.791388Z
GSA-FY2024-Annual-Performance-Report-Final-508-Jan-2025.pdf,gsa_fy2024_annual_performance_report_final_508_jan_2025,29,0,29,2631.793103448276,2025-08-05T01:58:40.018158Z,2025-08-05T01:58:41.724466Z
GSA_Annual_Performance_Plan_FY_2023_FINAL_508.pdf,gsa_annual_performance_plan_fy_2023_final_508,90,0,90,2137.3555555555554,2025-08-05T01:58:56.294563Z,2025-08-05T01:59:01.027461Z
executive-documents.pdf,executive_documents,1,0,1,117.0,2025-08-05T01:59:39.899189Z,2025-08-05T01:59:39.899189Z


In [0]:
spark.sql(f"DROP TABLE IF EXISTS {OUTPUT_CTLG}.{OUTPUT_SCHEMA}.{OUTPUT_TABLE}")

DataFrame[]

In [0]:
embedding_model = dbutils.widgets.get("embedding_model")

spark.sql(f"""
CREATE TABLE {OUTPUT_CTLG}.{OUTPUT_SCHEMA}.{OUTPUT_TABLE} as SELECT
  ROW_NUMBER() OVER (ORDER BY transcription) as id,
  doc_id,
  transcription,
  ai_query("{embedding_model}", subquery.transcription) as embedding
FROM
  (
    SELECT
      doc_id,
      transcription
    FROM
      {OUTPUT_CTLG}.{OUTPUT_SCHEMA}.all_pdfs_parsed
  ) AS subquery
""")

DataFrame[num_affected_rows: bigint, num_inserted_rows: bigint]