In [None]:
from ultralytics import YOLO
from PIL import Image
from pdf2image import convert_from_path
from io import BytesIO
import pathlib
import os

# Path to the input PDF
pdf_path = 'STANDARD GLASS_Price Band Ad_Material.pdf'

# Convert PDF pages to images with lower dpi to reduce memory usage
print("Converting PDF to images...")
pdf_images = convert_from_path(pdf_path, dpi=150)

# Process images in memory using BytesIO instead of saving to disk
temp_images = []
for page in pdf_images:
    img_bytes = BytesIO()
    page.save(img_bytes, format='PNG')
    temp_images.append(img_bytes.getvalue())

# Load the document segmentation model
print("Loading YOLO model...")
docseg_model = YOLO('yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt').to('cpu')

# Initialize a dictionary to store results
mydict = {}

# Process images in batches to avoid memory overflow
batch_size = 1
print("Processing images with YOLO...")
for i in range(0, len(temp_images), batch_size):
    batch = temp_images[i:i + batch_size]

    # Save batch to temporary files (YOLO requires file paths)
    batch_paths = []
    for idx, img_data in enumerate(batch):
        img_path = f'temp_page_{i + idx + 1}.png'
        with open(img_path, 'wb') as f:
            f.write(img_data)
        batch_paths.append(img_path)

    # Process batch with YOLO
    results = docseg_model(source=batch_paths, save=True, show_labels=True, show_conf=True, boxes=True)

    # Extract and store results
    for entry in results:
        thepath = pathlib.Path(entry.path)
        thecoords = entry.boxes.xyxy.numpy() if entry.boxes else []
        mydict.update({str(thepath): thecoords})

    # Remove temporary files after processing
    for img_path in batch_paths:
        os.remove(img_path)

# Output the results
print("Processed results:")
for path, coords in mydict.items():
    print(f"Path: {path}")
    print("Coordinates:", coords)


Converting PDF to images...
