In [None]:
# 1. Path Setup 
#1a. Define input data source (assume there is a dataset ‚Äúocr-main‚Äù already uploaded to Kaggle
base_path = '/kaggle/input/ocr-main' 


In [None]:
#1b. Define writable output directory
output_dir = '/kaggle/working/processed_files'
import os
os.makedirs(output_dir, exist_ok=True)


In [None]:
#1c. File names (Optional print statement for debugging)
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
#2. Get Dependencies (tesseract-ocr, ghostscript, qpdf)
# Install all system dependencies in one step
!apt-get update
!apt-get install -y ghostscript tesseract-ocr-chi-sim tesseract-ocr-chi-tra qpdf
!gs --version 
!qpdf --version


In [None]:
#3. Install OCRmyPDF
# Install the Python wrapper
!pip install ocrmypdf

import ocrmypdf
import logging


In [None]:
#4. Optional: configure logging to see progress in your console
ocrmypdf.configure_logging(verbosity=1)


In [None]:
#5. Process Input Files using OCRmyPDF (AMENDED SCRIPT)
def make_pdf_searchable(input_file_path, output_file_path):
    """Adds an OCR layer to a single PDF file."""
    print(f"Processing: {input_file_path}")
    try:
        ocrmypdf.ocr(
            input_file_path,
            output_file_path,
            language=['chi_sim', 'chi_tra', 'eng'],
            tesseract_timeout=600,
            output_type='pdf',
            oversample=300,
            tesseract_pagesegmode=6,
            deskew=True,
            skip_text=True
        )
        print(f"‚úÖ OCR complete for {os.path.basename(input_file_path)}")
    except Exception as e:
        print(f"‚ùå An error occurred processing {os.path.basename(input_file_path)}: {e}")


if __name__ == '__main__':
    # Use 'r' before the path to handle potential Windows backslashes (though less common in Kaggle)
    input_dir = base_path
    
    # Iterate over all files in the input directory that end with .pdf
    for filename in os.listdir(input_dir):
        if filename.lower().endswith('.pdf'):
            input_file_path = os.path.join(input_dir, filename)
            output_file_path = os.path.join(output_dir, filename)
            # Run the OCR process for each file
            make_pdf_searchable(input_file_path, output_file_path)


In [None]:
#6. Configure your Gemini API Key
import os
from kaggle_secrets import UserSecretsClient
try:
    GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")
    os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
    print("‚úÖ Gemini API key setup complete.")
except Exception as e:
    print(
        f"üîë Authentication Error: Please make sure you have added 'GOOGLE_API_KEY' to your Kaggle secrets. Details: {e}"
    )

In [None]:
#7. Import ADK components
from google.adk.runners import InMemoryRunner
from google.adk.tools import google_search
from google.genai import types
from typing import Any, Dict
from google.adk.agents import Agent, LlmAgent
from google.adk.apps.app import App, EventsCompactionConfig
from google.adk.models.google_llm import Gemini
from google.adk.sessions import DatabaseSessionService
from google.adk.sessions import InMemorySessionService
from google.adk.runners import Runner
from google.adk.tools.tool_context import ToolContext
from google.genai import types
print("‚úÖ ADK components imported successfully.")


In [None]:
#8. Define helper functions that will be reused throughout the notebook
# Define Run Session
async def run_session(
    runner_instance: Runner,
    user_queries: list[str] | str = None,
    session_name: str = "default",
):
    print(f"\n ### Session: {session_name}")

    # Get app name from the Runner
    app_name = runner_instance.app_name

    # Attempt to create a new session or retrieve an existing one
    try:
        session = await session_service.create_session(
            app_name=app_name, user_id=USER_ID, session_id=session_name
        )
    except:
        session = await session_service.get_session(
            app_name=app_name, user_id=USER_ID, session_id=session_name
        )

    # Process queries if provided
    if user_queries:
        # Convert single query to list for uniform processing
        if type(user_queries) == str:
            user_queries = [user_queries]

        # Process each query in the list sequentially
        for query in user_queries:
            print(f"\nUser > {query}")

            # Convert the query string to the ADK Content format
            query = types.Content(role="user", parts=[types.Part(text=query)])

            # Stream the agent's response asynchronously
            async for event in runner_instance.run_async(
                user_id=USER_ID, session_id=session.id, new_message=query
            ):
                # Check if the event contains valid content
                if event.content and event.content.parts:
                    # Filter out empty or "None" responses before printing
                    if (
                        event.content.parts[0].text != "None"
                        and event.content.parts[0].text
                    ):
                        print(f"{MODEL_NAME} > ", event.content.parts[0].text)
    else:
        print("No queries!")

from IPython.core.display import display, HTML
from jupyter_server.serverapp import list_running_servers


In [None]:
#9. Gets the proxied URL in the Kaggle Notebooks environment
def get_adk_proxy_url():
    PROXY_HOST = "https://kkb-production.jupyter-proxy.kaggle.net"
    ADK_PORT = "8000"

    servers = list(list_running_servers())
    if not servers:
        raise Exception("No running Jupyter servers found.")

    baseURL = servers[0]["base_url"]

    try:
        path_parts = baseURL.split("/")
        kernel = path_parts[2]
        token = path_parts[3]
    except IndexError:
        raise Exception(f"Could not parse kernel/token from base URL: {baseURL}")

    url_prefix = f"/k/{kernel}/{token}/proxy/proxy/{ADK_PORT}"
    url = f"{PROXY_HOST}{url_prefix}"

    styled_html = f"""
    <div style="padding: 15px; border: 2px solid #f0ad4e; border-radius: 8px; background-color: #fef9f0; margin: 20px 0;">
        <div style="font-family: sans-serif; margin-bottom: 12px; color: #333; font-size: 1.1em;">
            <strong>‚ö†Ô∏è IMPORTANT: Action Required</strong>
        </div>
        <div style="font-family: sans-serif; margin-bottom: 15px; color: #333; line-height: 1.5;">
            The ADK web UI is <strong>not running yet</strong>. You must start it in the next cell.
            <ol style="margin-top: 10px; padding-left: 20px;">
                <li style="margin-bottom: 5px;"><strong>Run the next cell</strong> (the one with <code>!adk web ...</code>) to start the ADK web UI.</li>
                <li style="margin-bottom: 5px;">Wait for that cell to show it is "Running" (it will not "complete").</li>
                <li>Once it's running, <strong>return to this button</strong> and click it to open the UI.</li>
            </ol>
            <em style="font-size: 0.9em; color: #555;">(If you click the button before running the next cell, you will get a 500 error.)</em>
        </div>
        <a href='{url}' target='_blank' style="
            display: inline-block; background-color: #1a73e8; color: white; padding: 10px 20px;
            text-decoration: none; border-radius: 25px; font-family: sans-serif; font-weight: 500;
            box-shadow: 0 2px 5px rgba(0,0,0,0.2); transition: all 0.2s ease;">
            Open ADK Web UI (after running cell below) ‚Üó
        </a>
    </div>
    """
    display(HTML(styled_html))
    return url_prefix
print("‚úÖ Helper functions defined.")


In [None]:
#10. Configure Retry Options
retry_config=types.HttpRetryOptions(
    attempts=5,  # Maximum retry attempts
    exp_base=7,  # Delay multiplier
    initial_delay=1, # Initial delay before first retry (in seconds)
    http_status_codes=[429, 500, 503, 504] # Retry on these HTTP errors
)


In [None]:
#11. Define Root Agent
root_agent = Agent(
    name="helpful_assistant",
    model=Gemini(
        model="gemini-2.5-flash-lite",
        retry_options=retry_config
    ),
    description="A simple agent that can answer general questions.",
    instruction="You are a helpful assistant. Use Google Search for current info or if unsure.",
    tools=[google_search],
)
print("‚úÖ Root Agent defined.")


In [None]:
#12. Run the agent
runner = InMemoryRunner(agent=root_agent)
print("‚úÖ Runner created.")


In [None]:
#13. Trial test on Runner
response = await runner.run_debug(
    "What is Architects use Geographic Information Systems (GIS) primarily for? How to do with ArcGIS GeoServices REST APIs for fetching data?"
)


In [None]:
#14. ADK Web Interface
!adk create sample-agent --model gemini-2.5-flash-lite --api_key $GOOGLE_API_KEY
url_prefix = get_adk_proxy_url()
!adk web --url_prefix {url_prefix}
