### Extract document using Textract Synchronous Call

In [112]:
import boto3
import re

def detect_words_in_document(document_path):
    # Initialize the AWS Textract client
    textract = boto3.client('textract')

    # Check if the document_path is an S3 path
    if document_path.startswith('s3://'):
        # Parse the S3 path
        s3_pattern = r's3://(?P<bucket>[^/]+)/(?P<key>.+)'
        match = re.match(s3_pattern, document_path)
        if not match:
            raise ValueError("Invalid S3 path format. Use 's3://bucket-name/path/to/document'")

        bucket_name = match.group('bucket')
        object_key = match.group('key')

        # Prepare S3 document for Textract
        document = {'S3Object': {'Bucket': bucket_name, 'Name': object_key}}
    else:
        # Handle local file
        with open(document_path, 'rb') as file:
            file_bytes = file.read()
        document = {'Bytes': file_bytes}

    # Call the Detect Document Text API
    response = textract.detect_document_text(Document=document)

    # Initialize lists to store words, bounding boxes, and confidence scores
    words = []
    bboxes = []
    confidence_scores = []

    # Process the results
    for block in response['Blocks']:
        if block['BlockType'] == 'WORD':
            words.append(block['Text'])

            # Extract and format bounding box
            bbox = block['Geometry']['BoundingBox']
            left = int(bbox['Left'] * 1000)
            top = int(bbox['Top'] * 1000)
            right = int((bbox['Left'] + bbox['Width']) * 1000)
            bottom = int((bbox['Top'] + bbox['Height']) * 1000)
            bboxes.append([left, top, right, bottom])

            confidence_scores.append(block['Confidence'])

    return words, bboxes, confidence_scores

# Example usage
document_path = file
words, bboxes, confidence_scores = detect_words_in_document(document_path)


## Create SageMaker Serverless Endpoint

In [67]:
import sagemaker
import boto3
from sagemaker.pytorch import PyTorchModel
from sagemaker.serverless import ServerlessInferenceConfig
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer
# Get the execution role
try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

# Model artifacts and code configuration
model_data = 's3://fairstone/output/churn-train-2024-05-01-01-57-13-054/output/model.tar.gz'  # Replace with your model artifacts path
entry_point = 'inference.py'

# Create PyTorch Model
pytorch_model = PyTorchModel(
    model_data=model_data,
    role=role,
    source_dir='source',
    entry_point=entry_point,
    framework_version='2.1.0',
    py_version='py310',
    
)

# Specify MemorySizeInMB and MaxConcurrency in the serverless config object
serverless_config = ServerlessInferenceConfig(
    memory_size_in_mb=4096,
    max_concurrency=10,
)

# Deploy the endpoint
predictor = pytorch_model.deploy(
    serverless_inference_config=serverless_config,
    serializer=JSONSerializer(),
    deserializer=JSONDeserializer()
)

-----!

## Invoke the endpoint

In [110]:
%%time
input_data = {
    "image": encoded_image,
    "question": "Question answering. What is the date on the form?",
   "words" : words,
"boxes":bboxes
}
predictor.predict(input_data)

CPU times: user 6.4 ms, sys: 149 µs, total: 6.55 ms
Wall time: 4.76 s


{'result': '12/10/98'}

# Bedrock Image Few Shot Examples

In [174]:
# Create the bedrock runtime to invoke LLM
from botocore.config import Config
import json
import base64
import time
config = Config(
    connect_timeout= 600, # Connection timeout parameter in seconds
    read_timeout=600, # Read timeout parameter in seconds
    retries = dict(
        max_attempts = 10 ## Handle retries
    )
)


import boto3
bedrock_runtime = boto3.client(service_name='bedrock-runtime',region_name="us-west-2",config=config) # change to your default region (us-west-2 is faster for Claude models)

In [175]:
def bedrock_streemer(response,model, stream=False):
    if stream:
        text=''
        for chunk in response['stream']:       

            if 'contentBlockDelta' in chunk:
                delta = chunk['contentBlockDelta']['delta']       
                if 'text' in delta:
                    text += delta['text']               
                    print(delta['text'] , end="")

            elif "metadata" in chunk:
                input_tokens=chunk['metadata']['usage']["inputTokens"]
                output_tokens=chunk['metadata']['usage']["outputTokens"]
                latency=chunk['metadata']['metrics']["latencyMs"]        
                print(f"\nInput Tokens: {input_tokens}\nOutput Tokens: {output_tokens}\nLatency: {latency}ms")
    else:
        
        text=response['output']['message']['content'][0]['text']
        token_usage = response['usage']
        input_tokens=token_usage['inputTokens']
        output_tokens=token_usage['outputTokens']
        latency=response['metrics']['latencyMs'] 
        print(f"Response:\n{text}\nInput Tokens: {input_tokens}\nOutput Tokens: {output_tokens}\nLatency: {latency}ms")       
      
    return text, input_tokens,output_tokens, latency

def bedrock_claude_(chat_history,system_message, prompt,model_id,stream=False,image_path=None):
    chat_history_copy = chat_history[:]
    content=[]
    if image_path:
        if not isinstance(image_path, list):
            image_path = [image_path]

        for ids,img in enumerate(image_path):
            image_name = os.path.basename(img)
            _, ext = os.path.splitext(image_name)
            if "jpg" in ext: ext = ".jpeg"

            if img.startswith("s3://"):
                # Handle S3 images
                s3 = boto3.client('s3', region_name="us-east-1")
                match = re.match("s3://(.+?)/(.+)", img)
                bucket_name = match.group(1)
                key = match.group(2)
                obj = s3.get_object(Bucket=bucket_name, Key=key)
                bytes_image = obj['Body'].read()
            else:
                # Handle local images
                with open(img, 'rb') as image_file:
                    bytes_image = image_file.read()
            
            content.extend([{"text":f"IMAGE {ids+1}({image_name}):"},{
              "image": {
                "format": f"{ext.lower().replace('.','')}",
                "source": {"bytes":bytes_image}
              }
            }])

    content.append({       
        "text": prompt
            })
    chat_history_copy.append({"role": "user",
            "content": content})
    system_message=[{"text":system_message}]
    if stream:
        response = bedrock_runtime.converse_stream(messages=chat_history_copy, modelId=model_id,inferenceConfig={"maxTokens": 2000, "temperature": 0.5,},system=system_message)
    else:
        response = bedrock_runtime.converse(messages=chat_history_copy, modelId=model_id,inferenceConfig={"maxTokens": 2000, "temperature": 0.5,},system=system_message)

    answer,input_tokens,output_tokens, latency=bedrock_streemer(response, model_id, stream) 
    return answer,input_tokens,output_tokens, latency


def image_bytes_get(image_path):
    """
    Function to create the few shot image examples
    """
    content=[]
    if not isinstance(image_path,dict):
         raise ValueError("Input must be in Dictionary format")
    else:
        for key,img in image_path.items():
            image_name = os.path.basename(img)
            _, ext = os.path.splitext(image_name)
            if "jpg" in ext: ext = ".jpeg"

            if img.startswith("s3://"):
                # Handle S3 images
                s3 = boto3.client('s3', region_name="us-east-1")
                match = re.match("s3://(.+?)/(.+)", img)
                bucket_name = match.group(1)
                key = match.group(2)
                obj = s3.get_object(Bucket=bucket_name, Key=key)
                bytes_image = obj['Body'].read()
            else:
                # Handle local images
                with open(img, 'rb') as image_file:
                    bytes_image = image_file.read()
            content.extend([{
                "role":"user",
                "content":[
                        {
                          "image": {
                            "format": f"{ext.lower().replace('.','')}",
                            "source": {"bytes":bytes_image}
                                  }
                        },
                        {"text":"What is this image?"}
                    ]            
                },            

                {"role":"assistant",
                 "content":[{
                     "text":key
                 }]
                }])
    return content

In [176]:
# This should be a dictionary of the images (local or S3) and the label description {'label description':'image_path'}

few_shot={'This is a paystub':'images/5d945f376f89f101477294.jpg',
 'This is a drivers license':'images/john-doc1.png',
 'This is a bank statement':'images/Lab.com (1)0.PNG',
 'This is a paystub':'images/5acf72145500c.jpg',
 'This is a bank statement':'images/TemplateLab0.PNG',
 'This is a W2':'images/W2_XL_input_clean_1001.jpg',
 'This is a W2':'images/W2_XL_input_clean_1568.jpg'}

# Create few shot examples structure
few_shot_content=image_bytes_get(few_shot)

In [180]:
# Prompt template 
question="""I have provided you a list of labelled documents as examples. 
Analyze those images carefully. Then provide a label for the unlabeled image(s)

<labels>
Here is the possible lables:
1. paystub
2. bank statement
3. W2
4. drivers license
</labels>

Provide a lable for the unlabeled image(s) from the list of possible labels above. Each image must have a single label only. 
If an image does not fall into any of the possible lables provided, respond with 'other'
Always provide your reason for labeling each image
Format your response as:
<label>
image label
</label>
Reason for assigning the label
""" # user prompt



model_id="anthropic.claude-3-5-sonnet-20240620-v1:0" #
image_path=["test.png"] # list of images (local or s3), leave empty if not using images
stream=True # Stream response or not
system_message="Your are an expert at anlyzing images and pay attention to image details. Your task will be to classify images into a list of provided labels"
response,input_tokens, output_tokens, latency=bedrock_claude_(few_shot_content,system_message, question,model_id,stream,image_path)

<label>
other
</label>

Reason for assigning the label:
This image does not match any of the provided labels (paystub, bank statement, W2, or driver's license). The document shown is a confidential facsimile transmission cover sheet from the Office of the Attorney General. It contains details such as fax numbers, sender information, and a confidentiality notice, which are not characteristic of the listed document types. The letterhead and official seal indicate it's a government document, specifically from the Attorney General's office, making it distinct from the financial and identification documents in the given labels.
Input Tokens: 6163
Output Tokens: 133
Latency: 6046ms
