In [1]:
import json
import requests
import json
import base64
from PIL import Image
from io import BytesIO
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
from requests_aws4auth import AWS4Auth
from requests.auth import HTTPBasicAuth
from botocore.exceptions import ClientError
from os import listdir, walk
import boto3

In [2]:
# image to base64
def image_to_base64(image_path):
    with Image.open(image_path) as img:
        # 创建一个字节流对象
        buffered = BytesIO()
        # 将图片保存到字节流中，并指定格式 (例如 'JPEG', 'PNG')
        img.save(buffered, format=img.format)
        # 获取字节数据并进行 Base64 编码
        img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
        return img_base64

In [3]:
def image_resize(base64_image_data,width,height):
        try:
            image_data = base64.b64decode(base64_image_data)
            # 将二进制数据转换为 Pillow 支持的 Image 对象
            image = Image.open(BytesIO(image_data))
            # 调整图片大小到 320x320
            resized_image = image.resize((width, height))

            # 将调整后的图片直接转换为 Base64 编码
            buffer = BytesIO()
            resized_image.save(buffer, format=image.format)  # 使用原始格式保存到内存
            resized_base64_data = base64.b64encode(buffer.getvalue()).decode()  # 转为 Base64 字符串
            return resized_base64_data
        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Error in resize image: {str(e)}")

# generate description test

In [45]:
# 描述信息生成函数
def enrich_image_desc(image_base64):
    client = boto3.client(
        "bedrock-runtime"
    )

    # Set the model ID, e.g., Titan Text Premier.
    # anthropic.claude-3-haiku-20240307-v1:0
    # anthropic.claude-3-5-sonnet-20240620-v1:0
    model_id = 'amazon.nova-pro-v1:0'

    # Start a conversation with the user message.
    user_message = """
        You will be analyzing an image and extracting its key features, including tags, and providing a brief summary of the image content.

        First, carefully examine the image provided in {$IMAGE}.

        Then, in Markdown format, provide the following:

        1. **Tags**: List the key tags that describe the main elements and subjects in the image.
        2. **Summary**: Write a concise 1-2 sentence summary describing the overall content and meaning of the image.

        Format your response as follows:

        # Image Analysis

        ## Tags
        - Tag 1
        - Tag 2
        - Tag 3

        ## Summary
        A brief 1-2 sentence summary of the image content.

        Provide your response within <result> tags.
    """
    
    image_base64 = image_resize(image_base64,320,320)


    if model_id=='anthropic.claude-3-5-sonnet-20240620-v1:0':
        body = json.dumps(
            {
                "anthropic_version": "bedrock-2023-05-31",
                "max_tokens": 5000,
                "messages": [
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "image",
                                "source": {
                                    "type": "base64",
                                    "media_type": "image/jpeg",
                                    "data": image_base64,
                                },
                            },
                            {"type": "text", "text": user_message},
                        ],
                    }
                ],
                
            }
        )
    else:
        print("in")
        body = json.dumps(
            {
                "schemaVersion": "messages-v1",
                # "inferenceConfig": {"max_new_tokens": 5000, "top_p": 0.9, "top_k": 20},
                "messages": [
                    {
                        "role": "user",
                        "content": [
                            {
                                "image": {
                                    "format": "jpg",
                                    "source": {"bytes": image_base64},
                                }
                            },
                            {
                                "text": user_message
                            }
                        ],
                    }
                ]
            }
        )
        
    try:
        # Send the message to the model, using a basic inference configuration.
        response = client.invoke_model(
            modelId=model_id,
            body=body
        )

        # Extract and print the response text.
        response_body = json.loads(response.get("body").read())
        print(response_body)
        return response_body

    except (ClientError, Exception) as e:
        print(str(e))
        # return(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
        # exit(1)
        raise e

In [46]:
img_base64 = image_to_base64("/Users/fangyili/Downloads/搜图10.19/元素图相同，款式不同/期望搜索图一.jpg")
img_base64 = image_resize(img_base64,320,320)
response_body = enrich_image_desc(img_base64)
description = response_body['output']['message']['content'][0]['text']

in
{'output': {'message': {'content': [{'text': '<result>\n\n# Image Analysis\n\n## Tags\n- Mouse pad\n- Butterfly design\n- Computer desk\n- Wireless mouse\n- Wooden table\n\n## Summary\nA wooden desk with a black mouse pad featuring a butterfly design, along with a wireless mouse and a computer monitor.\n\n</result>'}], 'role': 'assistant'}}, 'stopReason': 'end_turn', 'usage': {'inputTokens': 1472, 'outputTokens': 51, 'totalTokens': 1523, 'cacheReadInputTokenCount': None, 'cacheWriteInputTokenCount': None}}


# embedding test

In [41]:
description = response_body['output']['message']['content'][0]['text']
embedding_body = json.dumps({
    "inputText": description,
    "inputImage": img_base64,
    "embeddingConfig": {
        "outputEmbeddingLength": 1024
    }
})
bedrock_runtime = boto3.client("bedrock-runtime")
response = bedrock_runtime.invoke_model(
    body=embedding_body,
    modelId='amazon.titan-embed-image-v1',
    accept="application/json",
    contentType="application/json"
)
embedding_json = json.loads(response['body'].read().decode('utf-8'))
print(embedding_json["embedding"])

[0.022301413, 0.0035067056, 0.018086024, 0.00959682, 0.03234542, 0.058135927, 0.020214416, -0.0034246678, 0.016905293, 0.029982366, 0.035818502, 0.02521018, 0.016572323, -0.0107966475, 0.012530772, 0.011392394, -0.05040139, 0.0012261372, -0.013391379, -0.03344916, 0.09347598, 0.05293136, -0.022963531, -0.033736676, -0.023541395, -0.04417833, 0.024782112, -0.029565696, 0.025829125, -0.011607123, 0.00045775343, 0.0017256367, 0.03773834, 0.017993758, 0.026050488, -0.012257401, 0.117787525, -0.07782483, 0.0069218106, -0.01718172, 0.055414036, -0.038364984, 0.0029730701, 0.0717794, -0.007841157, 0.023501232, 0.015675724, -0.01516925, 0.00853758, -0.012002407, 0.00993518, 0.034142572, 0.040343337, -0.017055701, 0.00092979986, 0.008875926, -0.0037212337, -0.022349209, -0.062538564, 0.0064480267, -0.009185627, -0.035841167, 0.008189464, -0.048606336, 0.0009892364, 0.027376018, 0.01996718, 0.05325485, -0.022753257, 0.021250335, 0.0017185542, -0.0051859496, -0.063635156, -0.058658727, -0.0295180

In [42]:
len(embedding_json["embedding"])

1024