In [2]:
!pip install -r lab3/requirements.txt



In [3]:
import requests
from PIL import Image

file_path = "datasets/coco128/images/val2017/000000119995.jpg"

with open(file_path, 'rb') as file:
    data = file.read()


def run_bento(data):
    headers = {
        "accept": "application/json",
        "Content-Type": "image/png",
    }

    response = requests.post(
        "http://0.0.0.0:3000/invocation",
        headers=headers,
        data=data,
    )

    return response.text

In [117]:
def calculate_mAP(predictions, annotations, image_width, image_height):
    # Convert YOLO format to (x_min, y_min, x_max, y_max)
  # Convert YOLO format to (x_min, y_min, x_max, y_max)
    def yolo_to_bbox(x_center, y_center, width, height, img_width, img_height):
        x_min = (x_center - width / 2) * img_width
        y_min = (y_center - height / 2) * img_height
        x_max = (x_center + width / 2) * img_width
        y_max = (y_center + height / 2) * img_height
        return [x_min, y_min, x_max, y_max]
    
    
    # Convert predictions to (x_center, y_center, width, height)
    def pred_to_bbox(pred):
        x_min, y_min, width, height = pred
        x_max = x_min + width
        y_max = y_min + height
        return [x_min, y_min, x_max, y_max]
        
    # Calculate Intersection over Union (IoU)
    def calculate_iou(box1, box2):
        x1_min, y1_min, x1_max, y1_max = box1
        x2_min, y2_min, x2_max, y2_max = box2
    
        inter_x_min = max(x1_min, x2_min)
        inter_y_min = max(y1_min, y2_min)
        inter_x_max = min(x1_max, x2_max)
        inter_y_max = min(y1_max, y2_max)
    
        inter_area = max(0, inter_x_max - inter_x_min) * max(0, inter_y_max - inter_y_min)
        box1_area = (x1_max - x1_min) * (y1_max - y1_min)
        box2_area = (x2_max - x2_min) * (y2_max - y2_min)
    
        union_area = box1_area + box2_area - inter_area
        iou = inter_area / union_area
        return iou
    
    # Process ground truths
    gt_bboxes = []
    for ann in annotations:
        class_id, x_center, y_center, width, height = ann
        bbox = yolo_to_bbox(x_center, y_center, width, height, image_width, image_height)
        gt_bboxes.append((class_id, bbox))
    
    # Process predictions
    pred_bboxes = []
    for pred, class_id in predictions:
        bbox = pred_to_bbox(pred)
        pred_bboxes.append((class_id, bbox))
    
    # Calculate IoUs and determine TP, FP, FN
    iou_threshold = 0.5
    tp, fp, fn = 0, 0, 0
    
    # Create a list to store matched ground truths
    matched_gts = set()
    
    for pred_class, pred_bbox in pred_bboxes:
        if pred_class != 15:
            continue
        matched = False
        for gt_idx, (gt_class, gt_bbox) in enumerate(gt_bboxes):
            iou = calculate_iou(pred_bbox, gt_bbox)
            print(iou, pred_class, gt_class)
            if iou >= iou_threshold and pred_class == gt_class and gt_idx not in matched_gts:
                tp += 1
                matched_gts.add(gt_idx)
                matched = True
                break
        if not matched:
            fp += 1
    
    # Calculate FN
    fn = len(gt_bboxes) - tp
    
    # Calculate Precision and Recall
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    
    # Assuming we have one precision-recall point, mAP is simply precision at this recall
    mAP = precision
    print(tp, fp, fn, precision, recall, mAP)
    
    return mAP


In [118]:
import json


def convert_predictions(pred_dicts_str):
    pred_dicts = json.loads(pred_dicts_str)

    # Проходим по каждому словарю в списке и извлекаем данные
    predictions = []
    for item in pred_dicts:
        if 'bbox' not in item or 'class' not in item:
            break
        bbox = item["bbox"]
        class_id = item["class"]
        predictions.append((bbox, class_id))
    return predictions


def convert_annotations(annotations):
    # Разбиваем строку на строки, используя символ новой строки в качестве разделителя
    lines = annotations.strip().split('\n')

    # Создаём список для хранения кортежей
    data_list = []

    # Проходим по каждой строке
    for line in lines:
        # Разбиваем строку на подстроки, используя пробел в качестве разделителя
        values = line.split()
        # Преобразуем значения в числа
        values = [float(value) for value in values]
        # Преобразуем в кортеж и добавляем в список
        data_list.append(tuple(values))

    return data_list


def get_inference_time(predictions):
    result_dict = json.loads(predictions)
    if result_dict != []:
        return result_dict[0]["inference_time"]*1000

In [120]:
import os


def process_files_and_read_labels():
    images_directory = "datasets/cats_dataset/cats_dataset/images/val"
    labels_directory = "datasets/cats_dataset/cats_dataset/labels/val"

    # Получаем список файлов в директории с изображениями
    image_files = os.listdir(images_directory)
    i = 0
    mAP_sum = 0
    inference_time_sum = 0
    # Проходим по каждому файлу изображения
    for image_file in image_files:
        # Проверяем, что файл является файлом изображения
        if image_file.endswith((".jpg")):
            with open(os.path.join(images_directory, image_file), 'rb') as file:
                data = file.read()

            image = Image.open(os.path.join(images_directory, image_file))
            width, height = image.size

            result = run_bento(data)
            predictions = convert_predictions(result)
            inference_time = get_inference_time(result)
            
            label_file = os.path.join(labels_directory, os.path.splitext(image_file)[0] + ".txt")

            i = i + 1

            with open(label_file, "r") as f:
                label_content = f.read()
            mAP = calculate_mAP(predictions, convert_annotations(label_content), width, height)
            mAP_sum = mAP_sum + mAP
            inference_time_sum = inference_time_sum + inference_time

    return mAP_sum / 45, inference_time_sum/i


# Вызываем функцию для обработки файлов и чтения соответствующих меток
mAP, inference_time = process_files_and_read_labels()
print(mAP)
print(inference_time)

0.6457153322303872 15 15.0
1 0 0 1.0 1.0 1.0
0.9599627452450264 15 15.0
1 0 0 1.0 1.0 1.0
0.9380434597567834 15 15.0
1 0 0 1.0 1.0 1.0
0.7493950786803572 15 15.0
1 0 0 1.0 1.0 1.0
0.9209483022681416 15 15.0
1 0 0 1.0 1.0 1.0
0.9528520416993599 15 15.0
1 0 0 1.0 1.0 1.0
0 0 1 0 0.0 0
0.852423164934648 15 15.0
1 0 0 1.0 1.0 1.0
0.955915425380864 15 15.0
1 0 0 1.0 1.0 1.0
0.8955318409013261 15 15.0
1 0 0 1.0 1.0 1.0
0.364403579218337 15 15.0
0 1 1 0.0 0.0 0.0
0.5609177230878107 15 15.0
1 0 0 1.0 1.0 1.0
0.7881525948319493 15 15.0
1 0 1 1.0 0.5 1.0
0.6977114717016342 15 15.0
1 0 0 1.0 1.0 1.0
0.5558050646290761 15 15.0
1 0 0 1.0 1.0 1.0
0.9336268554417606 15 15.0
1 0 0 1.0 1.0 1.0
0.7051771818876917 15 15.0
1 0 0 1.0 1.0 1.0
0 0 1 0 0.0 0
0.9456073260073261 15 15.0
1 0 0 1.0 1.0 1.0
0.9192767830797457 15 15.0
1 0 0 1.0 1.0 1.0
0.015491640975389614 15 15.0
0.9347282678075123 15 15.0
0.8335908317235056 15 15.0
2 0 0 1.0 1.0 1.0
0 0 1 0 0.0 0
0.884712559455704 15 15.0
1 0 0 1.0 1.0 1.0
0.8933

In [102]:
file_path = "datasets/coco128/images/val2017/000000119995.jpg"

with open(file_path, 'rb') as file:
    data = file.read()

result = run_bento(data)
print(convert_predictions(result))

[([337, 31, 113, 202], 0), ([253, 211, 176, 57], 38), ([43, 26, 217, 450], 0), ([434, 0, 33, 24], 0)]


In [48]:
result_fict = json.loads(result)
print(result_fict["inference_time"])

TypeError: list indices must be integers or slices, not str

In [19]:
print(result)
result_fict = json.loads(result)
print(result_fict[0]["inference_time"])

[{"bbox":[337,31,113,202],"class":0,"inference_time":0.1375002861},{"bbox":[44,26,217,449],"class":0,"inference_time":0.1375002861},{"bbox":[253,211,176,56],"class":38,"inference_time":0.1375002861},{"bbox":[434,0,33,23],"class":0,"inference_time":0.1375002861}]
0.1375002861


In [64]:
import pandas as pd
results = []
df = pd.DataFrame(results, columns=['bbox', 'class'])
# print(df)

print(convert_predictions('[{"bbox":[401,0,190,156],"class":56,"inference_time":0.1030790806},{"bbox":[2,-5,449,484],"class":63,"inference_time":0.1030790806}]'))

[([401, 0, 190, 156], 56), ([2, -5, 449, 484], 63)]
