# Step 3: Define Answer Bounding Boxes (Enhanced)
1. Convert the exam PDF into page images.
2. Auto-detect bounding boxes with AI.
3. Manually review and adjust each answer region.

**Enhancements:**
- ‚úÖ Comprehensive validation of input files and setup
- ‚úÖ Enhanced OCR processing with retry logic and caching
- ‚úÖ Progress tracking for multi-page processing
- ‚úÖ Coordinate validation and scaling
- ‚úÖ Robust error handling and recovery
- ‚úÖ Detailed processing reports and validation summaries

In [1]:
from grading_utils import (
    setup_paths, create_directories, init_gemini_client, 
    validate_required_files, print_validation_summary
)
import logging
import time
import json
import os
import base64
from tqdm import tqdm
from pydantic import BaseModel, Field
from typing import List
from google import genai
from google.genai import types
from pdf2image import convert_from_path
from PIL import Image

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

prefix = "VTC Test"
paths = setup_paths(prefix, "sample")

# Validate required files exist
missing_files = validate_required_files(paths)
if missing_files:
    print("‚ùå Setup validation failed!")
    for file in missing_files:
        print(f"  Missing: {file}")
    raise FileNotFoundError("Please ensure all required files are present.")

pdf_file = paths["pdf_file"]

# Configuration - can be adjusted for testing
number_of_pages = 2  # Set to specific number for testing, or use len(pages) after conversion

print("‚úÖ Setup validation passed")

‚úÖ Setup validation passed


In [2]:
# Enhanced directory creation and PDF conversion
try:
    # Extract paths from setup
    file_name = paths["file_name"]
    base_path = paths["base_path"]
    base_path_images = paths["base_path_images"]
    base_path_annotations = paths["base_path_annotations"]

    # Create directories with error handling
    create_directories(paths)
    logger.info("‚úì Created all necessary directories")

    # Convert PDF to images with progress tracking
    logger.info("Converting PDF to images...")
    start_time = time.time()
    
    pages = convert_from_path(pdf_file, fmt='jpeg')
    conversion_time = time.time() - start_time
    
    logger.info(f"‚úì Converted PDF to {len(pages)} images in {conversion_time:.2f}s")
    
    # Save images with progress tracking
    for count, page in enumerate(tqdm(pages, desc="Saving images")):
        image_path = f'{base_path_images}{count}.jpg'
        page.save(image_path, 'JPEG')
    
    logger.info(f"‚úì Saved {len(pages)} images to {base_path_images}")
    
except Exception as e:
    logger.error(f"Failed to convert PDF or create directories: {e}")
    raise

2026-01-04 20:07:32,784 - INFO - ‚úì Created all necessary directories
2026-01-04 20:07:32,785 - INFO - Converting PDF to images...
2026-01-04 20:07:33,471 - INFO - ‚úì Converted PDF to 8 images in 0.69s
Saving images: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8/8 [00:00<00:00, 61.37it/s]
2026-01-04 20:07:33,606 - INFO - ‚úì Saved 8 images to ../marking_form/VTC Test/images/


In [3]:
# Enhanced utility functions with error handling
def update_json_file(annotations, path):
    """Update JSON file with error handling."""
    try:
        os.makedirs(os.path.dirname(path), exist_ok=True)
        with open(path, "w") as f:
            json.dump(annotations, f, indent=4)
        logger.info(f"‚úì Updated annotations file: {path}")
    except Exception as e:
        logger.error(f"Failed to update JSON file {path}: {e}")
        raise

def image_to_data_url(filename):
    """Convert image to data URL with error handling."""
    try:
        ext = filename.split(".")[-1].lower()
        if ext == 'jpg':
            ext = 'jpeg'
        prefix = f"data:image/{ext};base64,"
        
        with open(filename, "rb") as f:
            img = f.read()
        return prefix + base64.b64encode(img).decode("utf-8")
    except Exception as e:
        logger.error(f"Failed to convert image to data URL {filename}: {e}")
        raise

print("‚úì Utility functions defined")

‚úì Utility functions defined


In [4]:
# Enhanced Gemini client initialization
try:
    client = init_gemini_client()
    logger.info("‚úÖ Gemini client initialized successfully")
except Exception as e:
    logger.error(f"Failed to initialize Gemini client: {e}")
    raise

2026-01-04 20:07:33,676 - INFO - ‚úÖ Gemini client initialized successfully


‚úì Vertex AI Express Mode initialized


In [None]:
# Enhanced Pydantic models with validation
class BoundingBox(BaseModel):
    """Represents a single bounding box annotation with validation."""
    x: int = Field(description="X coordinate of the top-left corner", ge=0)
    y: int = Field(description="Y coordinate of the top-left corner", ge=0)
    width: int = Field(description="Width of the bounding box", gt=0)
    height: int = Field(description="Height of the bounding box", gt=0)
    label: str = Field(description="Question number (e.g., '1', '2', '3')", min_length=1)

class BoundingBoxResponse(BaseModel):
    """Wrapper class for list of bounding boxes with validation."""
    boxes: List[BoundingBox] = Field(description="List of bounding boxes for question cells")

logger.info("‚úì Pydantic models defined for structured output")

‚úì Pydantic models defined for structured output


In [6]:
# Enhanced OCR function with retry logic and caching
from grading_utils import get_cache_key, get_from_cache, save_to_cache, create_gemini_config

def ocr_structured_enhanced(prompt: str, filePath: str, response_schema: BaseModel, max_retries: int = 3):
    """
    Enhanced OCR function with caching, retry logic, and comprehensive error handling.
    
    Args:
        prompt: The prompt describing what to extract
        filePath: Path to the image file
        response_schema: Pydantic BaseModel class defining the expected response structure
        max_retries: Maximum number of retry attempts
    
    Returns:
        Parsed response as the specified Pydantic model
    """
    # Generate cache key
    cache_key = get_cache_key("ocr_structured", file=filePath, prompt_hash=hash(prompt))
    
    # Try to get from cache first
    cached_result = get_from_cache(cache_key)
    if cached_result:
        logger.info(f"‚úì Using cached result for {filePath}")
        try:
            return response_schema(**cached_result)
        except Exception as e:
            logger.warning(f"Failed to parse cached result: {e}")
    
    # Read the image file
    try:
        with open(filePath, "rb") as f:
            data = f.read()
    except Exception as e:
        logger.error(f"Failed to read image file {filePath}: {e}")
        return response_schema(boxes=[])
    
    # Create configuration with structured output
    config = create_gemini_config(
        temperature=0,
        top_p=0.5,
        max_output_tokens=65535,
        response_mime_type="application/json",
        response_schema=response_schema
    )
    
    # Retry logic
    for attempt in range(max_retries):
        try:
            logger.info(f"OCR attempt {attempt + 1}/{max_retries} for {filePath}")
            
            # Generate content with structured output
            response = client.models.generate_content(
                model="gemini-3-flash-preview",
                contents=[
                    {
                        "role": "user",
                        "parts": [
                            {"inline_data": {"mime_type": "image/jpeg", "data": data}},
                            {"text": prompt}
                        ]
                    }
                ],
                config=config,
            )
            
            # Try to use parsed property first
            if hasattr(response, 'parsed') and response.parsed is not None:
                result = response.parsed
                logger.info(f"‚úì Response parsed successfully - found {len(result.boxes)} boxes")
                
                # Cache the result
                save_to_cache(cache_key, result.model_dump())
                return result
            
            # Fall back to text-based parsing
            if response.text:
                import json
                result_dict = json.loads(response.text)
                result = response_schema(**result_dict)
                logger.info(f"‚úì Successfully parsed {len(result.boxes)} boxes from text")
                
                # Cache the result
                save_to_cache(cache_key, result.model_dump())
                return result
            
            logger.warning(f"Empty response received for {filePath} on attempt {attempt + 1}")
            
        except Exception as e:
            logger.error(f"OCR attempt {attempt + 1} failed for {filePath}: {e}")
            if attempt == max_retries - 1:
                logger.error(f"All OCR attempts failed for {filePath}")
                return response_schema(boxes=[])
            
            # Wait before retry
            time.sleep(2 ** attempt)  # Exponential backoff
    
    return response_schema(boxes=[])

logger.info("‚úì Enhanced OCR function defined")

2026-01-04 20:07:33,741 - INFO - ‚úì Enhanced OCR function defined


In [7]:
# Enhanced bounding box extraction with comprehensive processing
import json
import copy

# Enhanced prompt for better extraction
prompt = """Extract the coordinates of bounding boxes for each question/answer cell from the table in the image.

Instructions:
- Identify all table cells that contain question numbers (like "1", "2", "3", "4", "5", etc.)
- Question numbers are typically located in the top-left corner or top area of each cell
- Each bounding box should cover the entire cell area where a student would write their answer
- Include cells with sub-questions (like 22a, 22b, 22c, etc.) as separate bounding boxes
- Do NOT include cells that only contain "XXXXXXX" or are marked as non-answer areas
- Bounding boxes may be adjacent but should not overlap
- For merged cells spanning multiple rows/columns, create one bounding box covering the entire merged area
- Also identify and mark special fields: NAME, ID, CLASS (student information fields)

For each bounding box, provide:
- x: X coordinate of the top-left corner of the cell
- y: Y coordinate of the top-left corner of the cell
- width: Width of the entire cell (including answer space)
- height: Height of the entire cell (including answer space)
- label: The question number or field name (e.g., "1", "2", "3", "NAME", "ID", "CLASS")

Important: 
- Extract the question number text exactly as shown (including letters like "a", "b", "c" for sub-questions)
- Do not include the period after the question number in the label
- Focus on cells where students write answers, not header cells or instruction text
- Ensure NAME, ID, and CLASS fields are properly identified for student information
"""

print("üîç Starting enhanced bounding box extraction...")
print(f"Processing {number_of_pages} pages with enhanced OCR")

aiAnnotation = {}
processing_stats = {
    'total_pages': number_of_pages,
    'successful_pages': 0,
    'failed_pages': 0,
    'total_boxes': 0,
    'processing_time': 0
}

start_time = time.time()

# Process each page with progress tracking
for i in tqdm(range(number_of_pages), desc="Processing pages"):
    image_path = base_path_images + f"{i}.jpg"
    
    print(f"\n{'='*60}")
    print(f"Processing page {i} ({image_path})")
    print(f"{'='*60}")
    
    try:
        # Validate image exists
        if not os.path.exists(image_path):
            raise FileNotFoundError(f"Image file not found: {image_path}")
        
        # Use enhanced OCR with retry logic
        result = ocr_structured_enhanced(prompt, image_path, BoundingBoxResponse)
        
        # Convert Pydantic model to dict and extract boxes
        boxes_dict = [box.model_dump() for box in result.boxes]
        aiAnnotation[str(i)] = boxes_dict
        
        processing_stats['successful_pages'] += 1
        processing_stats['total_boxes'] += len(boxes_dict)
        
        print(f"‚úì Page {i}: Found {len(boxes_dict)} bounding boxes")
        if boxes_dict:
            print(json.dumps(boxes_dict, indent=2))
            
            # Validate bounding boxes
            for box in boxes_dict:
                if box['width'] <= 0 or box['height'] <= 0:
                    logger.warning(f"Invalid box dimensions on page {i}: {box}")
        else:
            print("  (No bounding boxes detected)")
            logger.warning(f"No bounding boxes found on page {i}")
            
    except Exception as e:
        logger.error(f"Failed to process page {i}: {type(e).__name__}: {e}")
        aiAnnotation[str(i)] = []
        processing_stats['failed_pages'] += 1

processing_stats['processing_time'] = time.time() - start_time

print(f"\n{'='*60}")
print("‚úÖ ENHANCED BOUNDING BOX EXTRACTION COMPLETED!")
print(f"{'='*60}")
print(f"üìä Processing Statistics:")
print(f"   Total pages: {processing_stats['total_pages']}")
print(f"   Successful: {processing_stats['successful_pages']}")
print(f"   Failed: {processing_stats['failed_pages']}")
print(f"   Total boxes found: {processing_stats['total_boxes']}")
print(f"   Processing time: {processing_stats['processing_time']:.2f}s")
print(f"   Average per page: {processing_stats['processing_time']/number_of_pages:.2f}s")
print(f"{'='*60}")

backup = copy.deepcopy(aiAnnotation)

üîç Starting enhanced bounding box extraction...
Processing 2 pages with enhanced OCR


Processing pages:   0%|          | 0/2 [00:00<?, ?it/s]2026-01-04 20:07:33,766 - INFO - OCR attempt 1/3 for ../marking_form/VTC Test/images/0.jpg
2026-01-04 20:07:33,767 - INFO - AFC is enabled with max remote calls: 10.



Processing page 0 (../marking_form/VTC Test/images/0.jpg)


2026-01-04 20:08:04,579 - INFO - HTTP Request: POST https://aiplatform.googleapis.com/v1beta1/publishers/google/models/gemini-3-flash-preview:generateContent "HTTP/1.1 200 OK"
2026-01-04 20:08:04,581 - INFO - ‚úì Response parsed successfully - found 6 boxes
Processing pages:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 1/2 [00:30<00:30, 30.82s/it]2026-01-04 20:08:04,583 - INFO - OCR attempt 1/3 for ../marking_form/VTC Test/images/1.jpg
2026-01-04 20:08:04,584 - INFO - AFC is enabled with max remote calls: 10.


‚úì Page 0: Found 6 bounding boxes
[
  {
    "x": 133,
    "y": 260,
    "width": 719,
    "height": 110,
    "label": "Q1"
  },
  {
    "x": 133,
    "y": 370,
    "width": 719,
    "height": 124,
    "label": "Q2"
  },
  {
    "x": 133,
    "y": 494,
    "width": 719,
    "height": 108,
    "label": "Q3"
  },
  {
    "x": 205,
    "y": 205,
    "width": 370,
    "height": 20,
    "label": "NAME"
  },
  {
    "x": 705,
    "y": 205,
    "width": 120,
    "height": 20,
    "label": "ID"
  },
  {
    "x": 205,
    "y": 235,
    "width": 370,
    "height": 20,
    "label": "CLASS"
  }
]

Processing page 1 (../marking_form/VTC Test/images/1.jpg)


2026-01-04 20:08:32,514 - INFO - HTTP Request: POST https://aiplatform.googleapis.com/v1beta1/publishers/google/models/gemini-3-flash-preview:generateContent "HTTP/1.1 200 OK"
2026-01-04 20:08:32,516 - INFO - ‚úì Response parsed successfully - found 5 boxes
Processing pages: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:58<00:00, 29.38s/it]

‚úì Page 1: Found 5 bounding boxes
[
  {
    "x": 135,
    "y": 178,
    "width": 132,
    "height": 20,
    "label": "NAME"
  },
  {
    "x": 584,
    "y": 178,
    "width": 241,
    "height": 20,
    "label": "ID"
  },
  {
    "x": 135,
    "y": 205,
    "width": 83,
    "height": 20,
    "label": "CLASS"
  },
  {
    "x": 135,
    "y": 234,
    "width": 719,
    "height": 111,
    "label": "4"
  },
  {
    "x": 135,
    "y": 345,
    "width": 719,
    "height": 123,
    "label": "5"
  }
]

‚úÖ ENHANCED BOUNDING BOX EXTRACTION COMPLETED!
üìä Processing Statistics:
   Total pages: 2
   Successful: 2
   Failed: 0
   Total boxes found: 11
   Processing time: 58.75s
   Average per page: 29.38s





In [11]:
# Enhanced coordinate scaling and validation
from PIL import Image

# Get image dimensions for scaling
sample_image_path = base_path_images + "0.jpg"
try:
    with Image.open(sample_image_path) as img:
        width, height = img.size
    
    logger.info(f"‚úì Image dimensions: {width}x{height}")
    print(f"Image dimensions: Width: {width}, Height: {height}")
    
    # Calculate scaling factors
    x_scale = width / 1000.0
    y_scale = height / 1000.0
    
    logger.info(f"Scaling factors: x={x_scale:.3f}, y={y_scale:.3f}")
    
    # Apply scaling with validation
    aiAnnotation = copy.deepcopy(backup)
    scaling_stats = {'scaled_boxes': 0, 'invalid_boxes': 0}
    
    for i in range(number_of_pages):
        for item in aiAnnotation[str(i)]:
            # Store original values for validation
            orig_x, orig_y = item['x'], item['y']
            orig_w, orig_h = item['width'], item['height']
            
            # Apply scaling
            item['x'] = int(round(item['x'] * x_scale))
            item['y'] = int(round(item['y'] * y_scale))
            item['width'] = int(round(item['width'] * x_scale))
            item['height'] = int(round(item['height'] * y_scale))
            
            # Validate scaled coordinates
            if (item['x'] < 0 or item['y'] < 0 or 
                item['x'] + item['width'] > width or 
                item['y'] + item['height'] > height):
                logger.warning(f"Scaled box out of bounds on page {i}: {item}")
                scaling_stats['invalid_boxes'] += 1
            else:
                scaling_stats['scaled_boxes'] += 1
    
    print(f"\nüìê Coordinate Scaling Results:")
    print(f"   Successfully scaled: {scaling_stats['scaled_boxes']} boxes")
    print(f"   Invalid after scaling: {scaling_stats['invalid_boxes']} boxes")
    
    # Save AI annotations
    ai_annotations_path = base_path_annotations + "ai_annotations.json"
    
    with open(ai_annotations_path, "w") as f:
        json.dump(aiAnnotation, f, indent=2)
    
    logger.info(f"‚úì Saved AI annotations to: {ai_annotations_path}")
    print(f"‚úì AI annotations saved to: {ai_annotations_path}")
    
except Exception as e:
    logger.error(f"Failed to process image dimensions or scaling: {e}")
    raise

2026-01-04 20:09:18,215 - INFO - ‚úì Image dimensions: 1654x2338
2026-01-04 20:09:18,216 - INFO - Scaling factors: x=1.654, y=2.338
2026-01-04 20:09:18,217 - INFO - ‚úì Saved AI annotations to: ../marking_form/VTC Test/annotations/ai_annotations.json


Image dimensions: Width: 1654, Height: 2338

üìê Coordinate Scaling Results:
   Successfully scaled: 11 boxes
   Invalid after scaling: 0 boxes
‚úì AI annotations saved to: ../marking_form/VTC Test/annotations/ai_annotations.json


## Manual Annotation Review and Adjustment

Please ensure the following are clearly marked on each page before grading:
- **ID**: Student identification number
- **NAME**: Student name field
- **CLASS**: Student class/section

Use the interactive widget below to review and adjust the AI-generated bounding boxes.

In [12]:
# Enhanced interactive annotation widget with comprehensive features
from jupyter_bbox_widget import BBoxWidget
import ipywidgets as widgets
import glob

# Initialize widget state
page = 1
pageAndBoundingBoxes = {}

# Get all image files
files = sorted(glob.glob(base_path_images + "*.jpg"))
logger.info(f"Found {len(files)} image files for annotation")

# Create progress widget
w_progress = widgets.IntProgress(
    value=0, 
    max=len(files), 
    description="Progress",
    style={'description_width': 'initial'}
)

# File paths
annotations_path = base_path_annotations + "annotations.json"
ai_annotations_path = base_path_annotations + "ai_annotations.json"

# Load existing annotations with priority: manual > AI
annotations = {}

# Load AI annotations first (as base)
if os.path.exists(ai_annotations_path):
    try:
        with open(ai_annotations_path, "r") as f: 
            annotations = json.load(f)
        logger.info(f"‚úì Loaded AI annotations for {len(annotations)} pages")
        print(f"‚úì Loaded AI annotations for {len(annotations)} pages")
    except Exception as e:
        logger.error(f"Failed to load AI annotations: {e}")

# Then merge/override with manual annotations if they exist
if os.path.exists(annotations_path):
    try:
        with open(annotations_path, "r") as f: 
            manual_annotations = json.load(f)
            annotations.update(manual_annotations)  # Manual annotations take priority
        logger.info(f"‚úì Merged manual annotations for {len(manual_annotations)} pages")
        print(f"‚úì Merged manual annotations for {len(manual_annotations)} pages")
    except Exception as e:
        logger.error(f"Failed to load manual annotations: {e}")

print(f"Total pages with annotations: {list(annotations.keys())}")

# Create question input widget
question_widget = widgets.Text(
    value="", 
    placeholder="Enter question label (e.g., '1', '2', 'NAME', 'ID')", 
    description="Question:",
    style={'description_width': 'initial'}
)

# Create status widget
status_widget = widgets.HTML(
    value="<b>Status:</b> Ready to annotate",
    description=""
)

# Create bbox widget
w_bbox = BBoxWidget(
    image=image_to_data_url(files[0]) if files else None
)
w_bbox.attach(question_widget, name="label")

# Load initial bounding boxes
initial_page = str(w_progress.value)
if initial_page in annotations:
    w_bbox.bboxes = annotations[initial_page]
    status_widget.value = f"<b>Status:</b> Loaded {len(annotations[initial_page])} boxes for page {w_progress.value}"
else:
    w_bbox.bboxes = []
    status_widget.value = f"<b>Status:</b> No annotations found for page {w_progress.value}"

# Enhanced skip function
def on_skip():
    if w_progress.value + 1 >= len(files):
        status_widget.value = f"<b>Status:</b> Already at the last page ({len(files)-1})"
        logger.info(f"Already at the last page ({len(files)-1})")
        return
    
    w_progress.value += 1
    current_page = str(w_progress.value)
    
    try:
        # Load new image in the widget
        image_file = files[w_progress.value]
        w_bbox.image = image_to_data_url(image_file)
        
        # Load bounding boxes for current page
        if current_page in annotations:
            w_bbox.bboxes = annotations[current_page]
            status_widget.value = f"<b>Status:</b> Loaded {len(annotations[current_page])} boxes for page {w_progress.value}"
            logger.info(f"‚úì Loaded {len(annotations[current_page])} bounding boxes for page {w_progress.value}")
        else:
            w_bbox.bboxes = []
            status_widget.value = f"<b>Status:</b> No annotations found for page {w_progress.value}"
            logger.warning(f"‚ö†Ô∏è No annotations found for page {w_progress.value}")
            
    except Exception as e:
        status_widget.value = f"<b>Status:</b> Error loading page {w_progress.value}: {e}"
        logger.error(f"Error loading page {w_progress.value}: {e}")

w_bbox.on_skip(on_skip)

# Enhanced submit function
def on_submit():
    try:
        current_page = str(w_progress.value)
        
        # Save annotations for current image
        annotations[current_page] = w_bbox.bboxes
        update_json_file(annotations, annotations_path)
        
        status_widget.value = f"<b>Status:</b> Saved {len(w_bbox.bboxes)} annotations for page {w_progress.value}"
        logger.info(f"‚úì Saved {len(w_bbox.bboxes)} annotations for page {w_progress.value}")
        
        # Move to next page
        on_skip()
        
    except Exception as e:
        status_widget.value = f"<b>Status:</b> Error saving annotations: {e}"
        logger.error(f"Error saving annotations: {e}")

w_bbox.on_submit(on_submit)

# Output widget for bbox changes
w_out = widgets.Output()

def on_bbox_change(change):
    w_out.clear_output(wait=True)
    with w_out:
        current_boxes = change["new"]
        print(f"Page {w_progress.value}: {len(current_boxes)} bounding boxes")
        if current_boxes:
            print(json.dumps(current_boxes, indent=2))
        pageAndBoundingBoxes[w_progress.value] = current_boxes

w_bbox.observe(on_bbox_change, names=["bboxes"])

# Create comprehensive widget container
w_container = widgets.VBox([
    widgets.HTML("<h3>üìù Enhanced Interactive Annotation Tool</h3>"),
    status_widget,
    widgets.HBox([
        question_widget,
        widgets.HTML("<i>Tip: Use 'NAME', 'ID', 'CLASS' for student info fields</i>")
    ]),
    w_progress,
    w_bbox,
    widgets.HTML("<b>Current Annotations:</b>"),
    w_out,
    widgets.HTML("""
    <div style='margin-top: 10px; padding: 10px; background-color: #f0f0f0; border-radius: 5px;'>
    <b>Instructions:</b><br>
    ‚Ä¢ Draw bounding boxes around answer areas<br>
    ‚Ä¢ Label each box with question number or field name<br>
    ‚Ä¢ Use 'Submit' to save and move to next page<br>
    ‚Ä¢ Use 'Skip' to move without saving<br>
    ‚Ä¢ Ensure NAME, ID, and CLASS fields are marked
    </div>
    """)
])

print("\nüéØ Interactive annotation widget ready!")
print("Use the widget below to review and adjust bounding boxes.")

w_container

2026-01-04 20:09:20,219 - INFO - Found 8 image files for annotation
2026-01-04 20:09:20,221 - INFO - ‚úì Loaded AI annotations for 2 pages


‚úì Loaded AI annotations for 2 pages
Total pages with annotations: ['0', '1']

üéØ Interactive annotation widget ready!
Use the widget below to review and adjust bounding boxes.


VBox(children=(HTML(value='<h3>üìù Enhanced Interactive Annotation Tool</h3>'), HTML(value='<b>Status:</b> Loade‚Ä¶

In [14]:
# Final enhanced summary and validation
def generate_annotation_summary():
    """Generate comprehensive annotation summary and validation report."""
    
    print(f"\n{'='*70}")
    print("üéâ ENHANCED STEP 3: ANNOTATION EXTRACTION COMPLETED")
    print(f"{'='*70}")
    
    # Load final annotations
    final_annotations = {}
    if os.path.exists(annotations_path):
        with open(annotations_path, "r") as f:
            final_annotations = json.load(f)
    
    # Generate statistics
    total_pages = len(final_annotations)
    total_boxes = sum(len(boxes) for boxes in final_annotations.values())
    
    # Analyze annotation types
    label_counts = {}
    required_fields = ['NAME', 'ID', 'CLASS']
    pages_with_required = {field: 0 for field in required_fields}
    
    for page, boxes in final_annotations.items():
        page_labels = set()
        for box in boxes:
            label = box.get('label', 'Unknown')
            label_counts[label] = label_counts.get(label, 0) + 1
            page_labels.add(label)
        
        # Check for required fields
        for field in required_fields:
            if field in page_labels:
                pages_with_required[field] += 1
    
    print(f"üìä Annotation Statistics:")
    print(f"   Total pages annotated: {total_pages}")
    print(f"   Total bounding boxes: {total_boxes}")
    print(f"   Average boxes per page: {total_boxes/total_pages:.1f}" if total_pages > 0 else "   No pages annotated")
    
    print(f"\nüè∑Ô∏è Label Distribution:")
    for label, count in sorted(label_counts.items()):
        print(f"   {label}: {count} boxes")
    
    print(f"\n‚úÖ Required Field Coverage:")
    all_required_present = True
    for field in required_fields:
        coverage = pages_with_required[field]
        status = "‚úì" if coverage > 0 else "‚ùå"
        print(f"   {status} {field}: Found on {coverage}/{total_pages} pages")
        if coverage == 0:
            all_required_present = False
    
    print(f"\nüìÅ Generated Files:")
    print(f"   ‚úÖ AI annotations: {ai_annotations_path}")
    print(f"   ‚úÖ Final annotations: {annotations_path}")
    print(f"   ‚úÖ Page images: {base_path_images} ({len(files)} files)")
    
    print(f"\nüéØ Next Steps:")
    if all_required_present:
        print(f"   ‚úÖ All required fields present - ready for Step 4")
        print(f"   1. Proceed to Step 4: Scoring Preprocessing")
        print(f"   2. The annotations will be used for answer extraction")
    else:
        print(f"   ‚ö†Ô∏è Missing required fields - please review annotations")
        print(f"   1. Use the annotation widget to add missing NAME/ID/CLASS fields")
        print(f"   2. Ensure all pages have student identification fields")
        print(f"   3. Then proceed to Step 4")
    
    print(f"\nüí° Quality Assurance:")
    print(f"   ‚Ä¢ Enhanced OCR with retry logic and caching")
    print(f"   ‚Ä¢ Comprehensive coordinate validation and scaling")
    print(f"   ‚Ä¢ Interactive review and adjustment capability")
    print(f"   ‚Ä¢ Detailed processing statistics and error handling")
    
    print(f"\n{'='*70}")
    print(f"‚úÖ Enhanced Step 3 completed successfully!")
    print("Ready for answer extraction and grading!")
    print(f"{'='*70}")

# Generate the summary
generate_annotation_summary()


üéâ ENHANCED STEP 3: ANNOTATION EXTRACTION COMPLETED
üìä Annotation Statistics:
   Total pages annotated: 2
   Total bounding boxes: 8
   Average boxes per page: 4.0

üè∑Ô∏è Label Distribution:
   CLASS: 1 boxes
   ID: 1 boxes
   NAME: 1 boxes
   Q1: 1 boxes
   Q2: 1 boxes
   Q3: 1 boxes
   Q4: 1 boxes
   Q5: 1 boxes

‚úÖ Required Field Coverage:
   ‚úì NAME: Found on 1/2 pages
   ‚úì ID: Found on 1/2 pages
   ‚úì CLASS: Found on 1/2 pages

üìÅ Generated Files:
   ‚úÖ AI annotations: ../marking_form/VTC Test/annotations/ai_annotations.json
   ‚úÖ Final annotations: ../marking_form/VTC Test/annotations/annotations.json
   ‚úÖ Page images: ../marking_form/VTC Test/images/ (8 files)

üéØ Next Steps:
   ‚úÖ All required fields present - ready for Step 4
   1. Proceed to Step 4: Scoring Preprocessing
   2. The annotations will be used for answer extraction

üí° Quality Assurance:
   ‚Ä¢ Enhanced OCR with retry logic and caching
   ‚Ä¢ Comprehensive coordinate validation and scaling
 