# Custom Vision SDK 사용하기

## 기본 환경다지기

In [None]:
ENDPOINT = "https://6a026customvision.cognitiveservices.azure.com/"
training_key = ""

prediction_endpoint = "https://6a026customvision-prediction.cognitiveservices.azure.com/"
prediction_key = ""
prediction_resource_id = "/subscriptions/b850d62a-25fe-4d3a-9697-ea40449528a9/resourceGroups/a026-vision-resource/providers/Microsoft.CognitiveServices/accounts/6a026CustomVision-Prediction"

In [5]:
from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateBatch, ImageFileCreateEntry, Region
from msrest.authentication import ApiKeyCredentials
import os, time, uuid

* `CustomVisionTrainingClient`	이 클래스는 모델의 생성, 학습 및 게시를 처리합니다.
* `CustomVisionPredictionClient`	이 클래스는 이미지 분류 예측에 대한 모델의 쿼리를 처리합니다.
* `ImagePrediction`	이 클래스는 단일 이미지에서 단일 개체 예측을 정의합니다. 여기에는 개체 ID 및 이름, 개체의 경계 상자 위치 및 신뢰도 점수에 대한 속성이 포함됩니다.

## 클라이언트 인증

In [7]:
credentials = ApiKeyCredentials(in_headers={"Training-key": training_key})
trainer = CustomVisionTrainingClient(ENDPOINT, credentials)
prediction_credentials = ApiKeyCredentials(in_headers={"Prediction-key": prediction_key})
predictor = CustomVisionPredictionClient(prediction_endpoint, prediction_credentials)

## 새 CustomVision 프로젝트 만들기

In [None]:
#새 CustomVision 프로젝트 만들기
project_name = "6a026-CustomVisionModel1"
description = "가위와 포크를 감지하는 모델"

publish_iteration_name = "detectModel"

# Find the object detection domain
obj_detection_domain = next(domain for domain in trainer.get_domains() if domain.type == "ObjectDetection" and domain.name == "General")

# Create a new project
print ("Creating project...")
# Use uuid to avoid project name collisions.
project = trainer.create_project(
    project_name,
    description,
    domain_id=obj_detection_domain.id
    )

print(project.id, project.name)

Creating project...
b6da435e-f416-4840-8082-dcc92573b4dc 6a026-CustomVisionModel1


In [None]:
#새 CustomVision 프로젝트 만들기
# project_name = "6a026-CustomVisionModel1"

# publish_iteration_name = "classifyModel"

# credentials = ApiKeyCredentials(in_headers={"Training-key": training_key})
# trainer = CustomVisionTrainingClient(ENDPOINT, credentials)

# # Create a new project
# print ("Creating project...")
# project_name = uuid.uuid4()
# project = trainer.create_project(project_name)

## 프로젝트 태그추가

In [9]:
# Make two tags in the new project
fork_tag = trainer.create_tag(project.id, "fork")
scissors_tag = trainer.create_tag(project.id, "scissors")

## 이미지 업로드 및 태그 지정

In [10]:
fork_image_regions = {
    "fork_1": [ 0.145833328, 0.3509314, 0.5894608, 0.238562092 ],
    "fork_2": [ 0.294117659, 0.216944471, 0.534313738, 0.5980392 ],
    "fork_3": [ 0.09191177, 0.0682516545, 0.757352948, 0.6143791 ],
    "fork_4": [ 0.254901975, 0.185898721, 0.5232843, 0.594771266 ],
    "fork_5": [ 0.2365196, 0.128709182, 0.5845588, 0.71405226 ],
    "fork_6": [ 0.115196079, 0.133611143, 0.676470637, 0.6993464 ],
    "fork_7": [ 0.164215669, 0.31008172, 0.767156839, 0.410130739 ],
    "fork_8": [ 0.118872553, 0.318251669, 0.817401946, 0.225490168 ],
    "fork_9": [ 0.18259804, 0.2136765, 0.6335784, 0.643790841 ],
    "fork_10": [ 0.05269608, 0.282303959, 0.8088235, 0.452614367 ],
    "fork_11": [ 0.05759804, 0.0894935, 0.9007353, 0.3251634 ],
    "fork_12": [ 0.3345588, 0.07315363, 0.375, 0.9150327 ],
    "fork_13": [ 0.269607842, 0.194068655, 0.4093137, 0.6732026 ],
    "fork_14": [ 0.143382356, 0.218578458, 0.7977941, 0.295751631 ],
    "fork_15": [ 0.19240196, 0.0633497, 0.5710784, 0.8398692 ],
    "fork_16": [ 0.140931368, 0.480016381, 0.6838235, 0.240196079 ],
    "fork_17": [ 0.305147052, 0.2512582, 0.4791667, 0.5408496 ],
    "fork_18": [ 0.234068632, 0.445702642, 0.6127451, 0.344771236 ],
    "fork_19": [ 0.219362751, 0.141781077, 0.5919118, 0.6683006 ],
    "fork_20": [ 0.180147052, 0.239820287, 0.6887255, 0.235294119 ]
}

scissors_image_regions = {
    "scissors_1": [ 0.4007353, 0.194068655, 0.259803921, 0.6617647 ],
    "scissors_2": [ 0.426470578, 0.185898721, 0.172794119, 0.5539216 ],
    "scissors_3": [ 0.289215684, 0.259428144, 0.403186262, 0.421568632 ],
    "scissors_4": [ 0.343137264, 0.105833367, 0.332107842, 0.8055556 ],
    "scissors_5": [ 0.3125, 0.09766343, 0.435049027, 0.71405226 ],
    "scissors_6": [ 0.379901975, 0.24308826, 0.32107842, 0.5718954 ],
    "scissors_7": [ 0.341911763, 0.20714055, 0.3137255, 0.6356209 ],
    "scissors_8": [ 0.231617644, 0.08459154, 0.504901946, 0.8480392 ],
    "scissors_9": [ 0.170343131, 0.332957536, 0.767156839, 0.403594762 ],
    "scissors_10": [ 0.204656869, 0.120539248, 0.5245098, 0.743464053 ],
    "scissors_11": [ 0.05514706, 0.159754932, 0.799019635, 0.730392158 ],
    "scissors_12": [ 0.265931368, 0.169558853, 0.5061275, 0.606209159 ],
    "scissors_13": [ 0.241421565, 0.184264734, 0.448529422, 0.6830065 ],
    "scissors_14": [ 0.05759804, 0.05027781, 0.75, 0.882352948 ],
    "scissors_15": [ 0.191176474, 0.169558853, 0.6936275, 0.6748366 ],
    "scissors_16": [ 0.1004902, 0.279036, 0.6911765, 0.477124184 ],
    "scissors_17": [ 0.2720588, 0.131977156, 0.4987745, 0.6911765 ],
    "scissors_18": [ 0.180147052, 0.112369314, 0.6262255, 0.6666667 ],
    "scissors_19": [ 0.333333343, 0.0274019931, 0.443627447, 0.852941155 ],
    "scissors_20": [ 0.158088237, 0.04047389, 0.6691176, 0.843137264 ]
}

In [None]:
# 디렉토리 확인하는법
import os
os.getcwd()

# os.path.dirname(__file__)

#  __file__ = 현재 파일이 들어있는 폴더 경로에다가 파일자체를 가져옴
# os.path.dirname(__file__) = 현재 파일이 들어있는 폴더 경로까지만 가져옴 (파일은 안가져옴)
# os.path.join()을 사용하면 OS에 따라 적절한 경로 형식이 자동 적용됨

'/Users/laxdin24/Documents/GitHub/MS_AI_SCHOOL_6/Azure Custom Vision'

In [None]:
base_image_location = os.path.join(os.getcwd(), "Images")

# Go through the data table above and create the images
print ("Adding images...")
tagged_images_with_regions = []

for file_name in fork_image_regions.keys():
    x,y,w,h = fork_image_regions[file_name]
    regions = [ Region(tag_id=fork_tag.id, left=x,top=y,width=w,height=h) ]

    with open(os.path.join (base_image_location, "fork", file_name + ".jpg"), mode="rb") as image_contents:
        tagged_images_with_regions.append(ImageFileCreateEntry(name=file_name, contents=image_contents.read(), regions=regions))

for file_name in scissors_image_regions.keys():
    x,y,w,h = scissors_image_regions[file_name]
    regions = [ Region(tag_id=scissors_tag.id, left=x,top=y,width=w,height=h) ]

    with open(os.path.join (base_image_location, "scissors", file_name + ".jpg"), mode="rb") as image_contents:
        tagged_images_with_regions.append(ImageFileCreateEntry(name=file_name, contents=image_contents.read(), regions=regions))

upload_result = trainer.create_images_from_files(project.id, ImageFileCreateBatch(images=tagged_images_with_regions))
if not upload_result.is_batch_successful:
    print("Image batch upload failed.")
    for image in upload_result.images:
        print("Image status: ", image.status)
    exit(-1)

Adding images...


## 프로젝트 학습

In [16]:
print ("Training...")
iteration = trainer.train_project(project.id)
while (iteration.status != "Completed"):
    iteration = trainer.get_iteration(project.id, iteration.id)
    print ("Training status: " + iteration.status)
    # time.sleep(1)

Training...
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training


In [17]:
performance = trainer.get_iteration_performance(project.id, iteration.id)

print(performance.precision, performance.recall, performance.average_precision)

1.0 0.625 1.0


## 현재 반복 게시(디플로이)

In [18]:
# The iteration is now trained. Publish it to the project endpoint
trainer.publish_iteration(project.id, iteration.id, publish_iteration_name, prediction_resource_id)
print ("Done!")

Done!


## 예측 엔드포인트 테스트

In [19]:
# Now there is a trained endpoint that can be used to make a prediction

# Open the sample image and get back the prediction results.
with open(os.path.join (base_image_location, "test", "test_image.jpg"), mode="rb") as test_data:
    results = predictor.detect_image(project.id, publish_iteration_name, test_data)

# Display the results.    
for prediction in results.predictions:
    print("\t" + prediction.tag_name + ": {0:.2f}% bbox.left = {1:.2f}, bbox.top = {2:.2f}, bbox.width = {3:.2f}, bbox.height = {4:.2f}".format(prediction.probability * 100, prediction.bounding_box.left, prediction.bounding_box.top, prediction.bounding_box.width, prediction.bounding_box.height))

	fork: 81.24% bbox.left = 0.05, bbox.top = 0.21, bbox.width = 0.71, bbox.height = 0.66
	fork: 0.84% bbox.left = 0.57, bbox.top = 0.53, bbox.width = 0.18, bbox.height = 0.30
	fork: 0.77% bbox.left = 0.57, bbox.top = 0.49, bbox.width = 0.43, bbox.height = 0.38
	fork: 0.69% bbox.left = 0.95, bbox.top = 0.35, bbox.width = 0.05, bbox.height = 0.09
	fork: 0.67% bbox.left = 0.95, bbox.top = 0.64, bbox.width = 0.05, bbox.height = 0.08
	fork: 0.65% bbox.left = 0.95, bbox.top = 0.57, bbox.width = 0.05, bbox.height = 0.08
	fork: 0.65% bbox.left = 0.94, bbox.top = 0.21, bbox.width = 0.06, bbox.height = 0.09
	fork: 0.62% bbox.left = 0.95, bbox.top = 0.28, bbox.width = 0.05, bbox.height = 0.09
	fork: 0.61% bbox.left = 0.77, bbox.top = 0.52, bbox.width = 0.19, bbox.height = 0.32
	fork: 0.61% bbox.left = 0.95, bbox.top = 0.50, bbox.width = 0.05, bbox.height = 0.08
	fork: 0.60% bbox.left = 0.95, bbox.top = 0.42, bbox.width = 0.05, bbox.height = 0.09
	fork: 0.58% bbox.left = 0.66, bbox.top = 0.52, bbox.

## Rest-Api 로 가져와서 그라디오에 구현하기

In [None]:
import requests
import os
import gradio as gr
from PIL import Image, ImageDraw, ImageFont
import json

url = "https://6a026customvision-prediction.cognitiveservices.azure.com/customvision/v3.0/Prediction/b6da435e-f416-4840-8082-dcc92573b4dc/detect/iterations/detectModel/image"
Prediction_Key = ""


def CustomVisionSDK(image_file, threshold=0.8):
    headers = {
        "Content-Type": "application/octet-stream",
        "Prediction-Key": Prediction_Key
    }
    
    # 로컬 파일을 바이너리로 읽어 요청 보내기
    with open(image_file, "rb") as f:
        response = requests.post(url, headers=headers, data=f)
        response_json = response.json()

        # 원본 이미지 열기
        image = Image.open(image_file)
        draw = ImageDraw.Draw(image)

        # 이미지 크기 가져오기 (픽셀 기준)
        image_width, image_height = image.size

        # 탐지확률 폰트
        font_path = "/Library/Fonts/AppleGothic.ttf"  # 경로는 환경에 따라 다름
        font = ImageFont.truetype(font_path, size=20)  # 폰트 크기 설정

        for i in response_json["predictions"]:
            if i["probability"] > threshold:
                bbox = i["boundingBox"]

                # 정규화된 좌표값(0~1 범위)을 실제 픽셀 단위로 변환
                left = int(bbox["left"] * image_width)
                top = int(bbox["top"] * image_height)
                width = int(bbox["width"] * image_width)
                height = int(bbox["height"] * image_height)

                # 변환된 좌표 출력 (디버깅용)
                # print(f"Bounding Box (px): left={left}, top={top}, width={width}, height={height}")

                # 네모 칸 좌표 변환
                bounding_box = ((left, top), (left + width, top + height))

                # 확률 표시
                annotation = f'Probability: {i["probability"]:.2f}'

                # 네모 및 텍스트 추가
                draw.rectangle(bounding_box, outline="red", width=5)
                draw.text((left, top), annotation, fill="black", font=font)

        return image

# Gradio 인터페이스
with gr.Blocks() as demo:
    gr.Markdown("# 🎯 Azure Custom Vision Object Detection")
    gr.Markdown("이미지를 업로드하면 Custom Vision을 이용해 객체를 탐지하고, 바운딩 박스를 그린 결과를 반환합니다.")
    
    with gr.Row():
        input_image = gr.Image(type="filepath", label="이미지 업로드")
        threshold_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.3, label="Threshold (스레시홀드)")
        output_image = gr.Image(label="결과 이미지")
    
    submit_button = gr.Button("객체 탐지 실행")
    
    submit_button.click(CustomVisionSDK, inputs=[input_image, threshold_slider], outputs=output_image)

demo.launch()

* Running on local URL:  http://127.0.0.1:7866

To create a public link, set `share=True` in `launch()`.


