In [26]:
import boto3
import json
from botocore.exceptions import ClientError
import re
import base64



# Initialize the Bedrock client
client = boto3.client("bedrock-runtime", region_name="us-east-1")

# Specify the model ID
model_id = "anthropic.claude-3-haiku-20240307-v1:0"
# read our image as binary data first
with open('sample-omr-3.jpg', 'rb') as image_file:
    encoded_image = base64.b64encode(image_file.read()).decode()
    payload = {
    "messages": [
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/jpeg",
                        "data": encoded_image
                    }
                },
                {
                    "type": "text",
                    "text": "You are an intelligent system designed to evaluate OMR (Optical Mark Recognition) answer sheets. An OMR sheet contains questions, each with multiple choice options/Responses labeled 1, 2, 3, and 4. The student fills in one of these options for each question by marking the corresponding bubble. You will be provided with an image of such an OMR sheet. Your task is to analyze the image and extract the information for each question, specifically identifying which option (1, 2, 3, or 4) the student has marked. Please return the results in a clear, structured format, listing each question number alongside the marked option. If a question has multiple marked options or is left blank, please note that as well. There are 200 questions in total"
                }
            ]
        }
    ],
    "max_tokens": 10000,
    "anthropic_version": "bedrock-2023-05-31"
}
# we're ready to invoke the model!
response = client.invoke_model(
    modelId=model_id,
    contentType="application/json",
    body=json.dumps(payload)
)
# now we need to read the response. It comes back as a stream of bytes so if we want to display the response in one go we need to read the full stream first
# then convert it to a string as json and load it as a dictionary so we can access the field containing the content without all the metadata noise
output_binary = response["body"].read()
output_json = json.loads(output_binary)
output = output_json["content"][0]["text"]
print(output)

Here is the analysis of the OMR answer sheet:

001: 4
002: 3
003: 1
004: 3
005: 2
006: 4
007: 4
008: 3
009: 2
010: 3
011: 3
012: 3
013: 3
014: 4
015: 3
016: 3
017: 3
018: 4
019: 4
020: 3
021: 3
022: 4
023: 4
024: 3
025: 3
026: 3
027: 4
028: 4
029: 3
030: 4
031: 3
032: 1
033: 3
034: 4
035: 3

036: 3
037: 3
038: 4
039: 2
040: 3
041: 3
042: 3
043: 3
044: 3
045: 3
046: 4
047: 3
048: 3
049: 3
050: 3

051: 4
052: 3
053: 4
054: 1
055: 4
056: 4
057: 2
058: 3
059: 3
060: 3
061: 4
062: 3
063: 4
064: 3
065: 1
066: 4
067: 2
068: 3
069: 3
070: 3
071: 3
072: 3
073: 4
074: 2
075: 3
076: 1
077: 1
078: 3
079: 3
080: 3
081: 2
082: 3
083: 4
084: 4
085: 4

101: 3
102: 4
103: 1
104: 2
105: 2
106: 4
107: 4
108: 3
109: 3
110: 3
111: 3
112: 3
113: 2
114: 1
115: 2
116: 4
117: 3
118: 4
119: 2
120: 3
121: 2
122: 3
123: 2
124: 3
125: 3
126: 1
127: 1
128: 3
129: 4
130: 4
131: 1
132: 2
133: 3
134: 3
135: 4

151: 4
152: 4
153: 4
154: 3
155: 4
156: 2
157: 4
158: 1
159: 3
160: 1
161: 3
162: 3
163: 2
164: 3
165: 2
166: