In [None]:
from dotenv import load_dotenv

load_dotenv('.env')

In [12]:
import os
import requests

# Load environment variables
endpoint = os.getenv('AZURE_VISION_ENDPOINT') + "computervision/"
key = os.getenv('AZURE_VISION_KEY')

def get_image_embedding(image):
    with open(image, "rb") as img:
        data = img.read()

    # Vectorize Image API
    version = "?api-version=2023-02-01-preview&modelVersion=latest"
    vectorize_img_url = endpoint + "retrieval:vectorizeImage" + version

    headers = {
        "Content-type": "application/octet-stream",
        "Ocp-Apim-Subscription-Key": key
    }

    try:
        r = requests.post(vectorize_img_url, data=data, headers=headers)

        if r.status_code == 200:
            image_vector = r.json()["vector"]
            return image_vector
        else:
            print(f"An error occurred while processing {image}. Error code: {r.status_code}.")
        
    except Exception as e:
        print(f"An error occurred while processing {image}: {e}")

    return None

image_filename = "extracted_images/image_cropped_1.png"
ref_image_vector = get_image_embedding(image_filename)

In [15]:
import os

image_folder = 'images/jerseys'
image_embeddings = {}

for filename in os.listdir(image_folder):
    if filename.endswith('.png') or filename.endswith('.jpg'):
        image_path = os.path.join(image_folder, filename)
        image_vector = get_image_embedding(image_path)
        image_embeddings[filename] = image_vector

print(image_embeddings)


{'golden-state-warriors-2023-24-city-jersey.jpg': [-1.2675781, 1.0039062, -2.7519531, -2.2246094, -0.12030029, -1.640625, 2.7207031, 2.4023438, -0.38867188, -1.4384766, 4.5546875, -0.81689453, 0.13122559, -2.1933594, -2.7636719, -1.1689453, -0.7705078, -1.0742188, -0.7529297, -0.31860352, -2.078125, -0.8208008, 0.7919922, 3.3730469, -3.5761719, -1.0673828, 0.32885742, 1.875, 2.65625, -3.0527344, 6.1640625, 0.41674805, -1.0195312, 0.9472656, 0.2578125, -3.9941406, -3.9003906, -1.2080078, 0.31884766, 1.5546875, 7.4453125, -2.9882812, 2.3632812, -2.21875, 1.1748047, -0.28295898, 3.6855469, -3.1230469, -0.79003906, -1.1777344, 2.5039062, 0.71875, -0.94628906, -1.3730469, -1.6484375, -1.8193359, -0.5136719, -3.421875, 1.640625, -1.3701172, -1.7753906, 0.32006836, -3.0644531, 3.5292969, 1.4375, -4.5507812, 1.9609375, 2.6855469, -8.078125, 1.4394531, 0.66552734, 3.2695312, 4.96875, -0.9550781, -5.8632812, 4.90625, 1.1982422, -2.4902344, 4.2070312, -1.3173828, -1.625, 3.9414062, 2.9316406, -0.

In [16]:
from sklearn.metrics.pairwise import cosine_similarity

max_similarity = -1
most_similar_image = None

for image_filename, image_vector in image_embeddings.items():
    similarity = cosine_similarity([ref_image_vector], [image_vector])[0][0]
    print(f"{image_filename}: {similarity}")
    if similarity > max_similarity:
        max_similarity = similarity
        most_similar_image = image_filename

print("Most similar image:", most_similar_image)


golden-state-warriors-2023-24-city-jersey.jpg: 0.5662579821074665
miami-heat-2023-24-icon-jersey.jpg: 0.6038383407330046
san-antonio-spurs-2023-24-icon-jersey.jpg: 0.7256503793720345
Most similar image: san-antonio-spurs-2023-24-icon-jersey.jpg
