# Create Marking Form
1. Convert PDF into images.
2. Highlight student's answer

Install Linux tools and only required for the first run.

In [None]:
!sudo apt-get update
!sudo apt-get -y install poppler-utils

## Define the student script

In [29]:
pdf_file = "../data/TestScript.pdf"

In [30]:
# extrat file name from pdf_file
import os

file_name = os.path.basename(pdf_file)
file_name = os.path.splitext(file_name)[0]
base_path = "../marking_form/" + file_name
base_path_images = base_path + "/images/"
base_path_annotations = base_path+"/annotations/"
# create directory tree for base_path_images
os.makedirs(base_path_images, exist_ok=True)
os.makedirs(base_path_annotations, exist_ok=True)

## Convert PDF to JPG

In [31]:
# read pdf and convert to images
# https://stackoverflow.com/questions/46184239/how-to-convert-pdf-to-image-using-python
from pdf2image import convert_from_path
import os

pages = convert_from_path(pdf_file, fmt='jpeg')
# extrat file name from pdf_file
file_name = os.path.basename(pdf_file)
file_name = os.path.splitext(file_name)[0]

for count, page in enumerate(pages):
    page.save(f'{base_path_images}{count}.jpg', 'JPEG')

In [32]:
import base64
import json

def update_json_file(annotations, path):
    with open(path, "w") as f:
        json.dump(annotations, f, indent=4)   

def image_to_data_url(filename):
    ext = filename.split(".")[-1]
    prefix = f"data:image/{ext};base64,"
    with open(filename, "rb") as f:
        img = f.read()
    return prefix + base64.b64encode(img).decode("utf-8")

In [None]:
project_id = 'cyrus-testing-2023'
!gcloud config set project {project_id}
!gcloud auth application-default set-quota-project {project_id}

In [7]:
from google.cloud import aiplatform

aiplatform.init(
    # your Google Cloud Project ID or number
    # environment default used is not set
    project=project_id
)

In [11]:
import vertexai
from vertexai.generative_models import GenerativeModel, Part
import vertexai.preview.generative_models as generative_models

vertexai.init(project=project_id, location="us-central1")
model = GenerativeModel("gemini-1.5-pro-002") 

def ocr(prompt:str, filePath:str):       
    with open(filePath, "rb") as f:
        data = f.read()
    image1 = Part.from_data(mime_type="image/png", data=data)
    generation_config = {
        "max_output_tokens": 8192,
        "temperature": 0,
        "top_p": 0.5,
    }
    safety_settings = {
        generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
        generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
        generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
        generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    }
    responses = model.generate_content(
        [image1, prompt],
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=True,
    )
    text = ""
    for response in responses:        
        text += response.text
    print(text)
    return text

In [None]:
import json

prompt = """Extract the coordinate of restangle cells bound box from the image.
Label is the question number such as Q1, Q2, Q3, etc. and it is the text inside the top right of the cell.
Cells are not in the form of table, they are just rectangle boxes.
Bound box is no overlap with other bound box.
Output should be in the following JSON Array format:
[
    {
        "x": 152,
        "y": 313,
        "width": 702,
        "height": 243,
        "label": "1"
    },
    {
        "x": 152,
        "y": 313,
        "width": 702,
        "height": 243,
        "label": "2"
    },
    .....
]
Do not include any explanations, only provide a RFC8259 compliant JSON response following this format without deviation in the following format.
Output:

"""

aiAnnoation = {}
number_of_pages = 1
for i in range(number_of_pages):
    image_path = base_path_images + f"{i}.jpg"
    result = ocr(prompt, image_path) 
    result = result.strip("```json\n").strip("```")
    result = json.loads(result)
    aiAnnoation[str(i)] = result

print(aiAnnoation)

In [61]:
import copy
backup = copy.deepcopy(aiAnnoation)

In [None]:
from PIL import Image

# Open an image file
with Image.open(image_path) as img:
    # Get width and height
    width, height = img.size

print(f"Width: {width}, Height: {height}")

In [67]:
import json
import copy
aiAnnoation = copy.deepcopy(backup)

x_scale = width / 1000.0
y_scale = height / 1000.0
# x_scale = 1
# y_scale = 1
for i in range(number_of_pages):
    for item in aiAnnoation[str(i)]:
        item['x'] = int(round(item['x'] * x_scale))
        item['y'] = int(round(item['y'] * y_scale)) 
        item['width'] = int(round(item['width'] * x_scale))
        item['height'] = int(round(item['height'] * y_scale))


ai_annotations_path = base_path_annotations + "ai_annotations.json"

# Save the aiAnnoation variable to a JSON file
with open(ai_annotations_path, "w") as f:
    json.dump(aiAnnoation, f)


In [None]:
from jupyter_bbox_widget import BBoxWidget
import ipywidgets as widgets
import json
import glob

page = 1
pageAndBoxingBoxes={}

files = sorted(glob.glob(base_path_images + "*.jpg"))

w_progress = widgets.IntProgress(value=0, max=len(files), description="Progress")
annotations_path = base_path_annotations + "annotations.json"
ai_annotations_path = base_path_annotations + "ai_annotations.json"

annotations = {}
# if annotations_path exists, load annotations from it
if os.path.exists(ai_annotations_path):
    with open(ai_annotations_path, "r") as f: 
        annotations = json.load(f) 

if os.path.exists(annotations_path):
    with open(annotations_path, "r") as f: 
        annotations = json.load(f) 

question_widget = widgets.Text(value="", placeholder="", description="Question:")

w_bbox = BBoxWidget(
    image=image_to_data_url(files[0])   
)
w_bbox.attach(question_widget, name="label")
w_bbox.bboxes = annotations[str(w_progress.value)] if str(w_progress.value) in annotations else []

# when Skip button is pressed we move on to the next file
def on_skip():
    w_progress.value += 1
    # open new image in the widget
    image_file = files[w_progress.value]
    w_bbox.image = image_to_data_url(image_file)     
    w_bbox.bboxes = annotations[str(w_progress.value)] if str(w_progress.value) in annotations else []


w_bbox.on_skip(on_skip)

# when Submit button is pressed we save current annotations
# and then move on to the next file
def on_submit():
    image_file = files[w_progress.value]
    # save annotations for current image
    annotations[str(w_progress.value)] = w_bbox.bboxes
    update_json_file(annotations, annotations_path)
    # move on to the next file
    on_skip()


w_bbox.on_submit(on_submit)
w_out = widgets.Output()

def on_bbox_change(change):
    w_out.clear_output(wait=True)
    with w_out:
        print(json.dumps(change["new"], indent=4))
        pageAndBoxingBoxes[w_progress.value] = change["new"]


w_bbox.observe(on_bbox_change, names=["bboxes"])

w_container = widgets.VBox(
    [
        widgets.HBox(
            [
                question_widget            
            ]
        ),
        w_progress,
        w_bbox,
        w_out,
    ]
)
w_container