In [1]:
import os
from azure.ai.vision.imageanalysis import ImageAnalysisClient
from azure.ai.vision.imageanalysis.models import VisualFeatures
from azure.core.credentials import AzureKeyCredential

# Set the values of your computer vision endpoint and computer vision key
# as environment variables:
try:
    endpoint = os.environ["VISION_ENDPOINT_V4"]
    key = os.environ["VISION_KEY_V4"]
except KeyError:
    print("Missing environment variable 'VISION_ENDPOINT' or 'VISION_KEY'")
    print("Set them before running this sample.")
    exit()


In [2]:
# Create an Image Analysis client
client = ImageAnalysisClient(
    endpoint=endpoint,
    credential=AzureKeyCredential(key)
)

# Get a caption for the image. This will be a synchronously (blocking) call.
result = client.analyze_from_url(
    image_url="https://learn.microsoft.com/azure/ai-services/computer-vision/media/quickstarts/presentation.png",
    visual_features=[VisualFeatures.CAPTION, VisualFeatures.READ],
    gender_neutral_caption=True,  # Optional (default is False)
)

print("Image analysis results:")
# Print caption results to the console
print(" Caption:")
if result.caption is not None:
    print(f"   '{result.caption.text}', Confidence {result.caption.confidence:.4f}")

# Print text (OCR) analysis results to the console
print(" Read:")
if result.read is not None:
    for line in result.read.blocks[0].lines:
        print(f"   Line: '{line.text}', Bounding box {line.bounding_polygon}")
        for word in line.words:
            print(f"     Word: '{word.text}', Bounding polygon {word.bounding_polygon}, Confidence {word.confidence:.4f}")

Image analysis results:
 Caption:
   'a person pointing at a screen', Confidence 0.7768
 Read:
   Line: '9:35 AM', Bounding box [{'x': 131, 'y': 130}, {'x': 214, 'y': 130}, {'x': 214, 'y': 148}, {'x': 131, 'y': 148}]
     Word: '9:35', Bounding polygon [{'x': 132, 'y': 130}, {'x': 172, 'y': 131}, {'x': 171, 'y': 149}, {'x': 131, 'y': 148}], Confidence 0.9770
     Word: 'AM', Bounding polygon [{'x': 180, 'y': 131}, {'x': 203, 'y': 131}, {'x': 202, 'y': 149}, {'x': 180, 'y': 149}], Confidence 0.9980
   Line: 'Conference room 154584354', Bounding box [{'x': 132, 'y': 153}, {'x': 224, 'y': 153}, {'x': 224, 'y': 161}, {'x': 132, 'y': 160}]
     Word: 'Conference', Bounding polygon [{'x': 143, 'y': 153}, {'x': 174, 'y': 154}, {'x': 174, 'y': 161}, {'x': 143, 'y': 161}], Confidence 0.6930
     Word: 'room', Bounding polygon [{'x': 176, 'y': 154}, {'x': 188, 'y': 154}, {'x': 188, 'y': 161}, {'x': 176, 'y': 161}], Confidence 0.9590
     Word: '154584354', Bounding polygon [{'x': 192, 'y': 154},