In [1]:
import PIL.Image
import os
import google.generativeai as genai
from pdf2image import convert_from_path

# Replace with your API key
GOOGLE_API_KEY = "" 

genai.configure(api_key=GOOGLE_API_KEY)

pdf_path = "STANDARD GLASS_Price Band Ad_Material.pdf"
pdf_name = os.path.splitext(os.path.basename(pdf_path))[0]

# Create the output directory if it doesn't exist
output_dir = "GeminiVisionResult"
os.makedirs(output_dir, exist_ok=True)

# Choose a Gemini model.
model = genai.GenerativeModel(model_name="gemini-1.5-pro")

prompt = """
    Extract all text content and tabular data from this image, strictly preserving the original reading order as they appear on the page.

    1. **Reading Order:** Process the content strictly based on the reading order within the image. Do not rearrange or reorder blocks or tables.

    2. **Text Blocks:** Extract distinct blocks of text and represent each block as a separate entity, separated by double newlines ("\\n\\n").

    3. **Tables:** Identify any tables present in the image. For each table, output it in a structured, comma-separated format (.csv). Each row of the table should be on a new line, with commas separating column values.
        - Include the header row, if present.
        - Ensure that all columns of each row are comma separated values.

    4. **Output Format:**
        - Output text blocks and tables in the order they are read on the page. When a table is encountered while reading the page, output it in CSV format at that point in the output.

    5. If there are no text or no tables return empty string.

     If the table contains only one row, then return text of that row separated by comma.
    """

try:
    # Convert all pages of the PDF to PIL image objects
    images = convert_from_path(pdf_path)
    
    if not images:
        raise FileNotFoundError(f"Could not convert the PDF to images")

    for i, img in enumerate(images):
        page_number = i + 1
        output_file_path = os.path.join(output_dir, f"{pdf_name}_{page_number}.txt")
        
        try:
           response = model.generate_content([prompt, img], generation_config={"max_output_tokens": 4096})
           response.resolve()

           with open(output_file_path, "w", encoding="utf-8") as f:
              f.write(response.text)

           print(f"Processed page {page_number} and saved to {output_file_path}")
        
        except Exception as page_err:
           print(f"Error processing page {page_number}: {page_err}")
           with open(output_file_path, "w", encoding="utf-8") as f:
              f.write(f"Error: An error occurred during processing of page {page_number} : {page_err}")

except FileNotFoundError as e:
    print(f"Error: Could not find file: {e}")
except Exception as e:
    print(f"Error: An error occurred during processing: {e}")

  from .autonotebook import tqdm as notebook_tqdm


Processed page 1 and saved to GeminiVisionResult/STANDARD GLASS_Price Band Ad_Material_1.txt
Processed page 2 and saved to GeminiVisionResult/STANDARD GLASS_Price Band Ad_Material_2.txt
Processed page 3 and saved to GeminiVisionResult/STANDARD GLASS_Price Band Ad_Material_3.txt
Processed page 4 and saved to GeminiVisionResult/STANDARD GLASS_Price Band Ad_Material_4.txt
Processed page 5 and saved to GeminiVisionResult/STANDARD GLASS_Price Band Ad_Material_5.txt
