In [1]:
import os, cv2
import pandas as pd
from tqdm import tqdm

dataset_name = "dognose_yolov5_20220711_002"
dataset_root_dir = f"../../datasets/nosedataforyolo/{dataset_name}"
dataset_labels_dir = f"{dataset_root_dir}/labels"
dataset_images_dir = f"{dataset_root_dir}/images"
new_label_path = f"{dataset_labels_dir}/label.txt"
verification_path = f"label_verification/{dataset_name}/"
os.makedirs(verification_path, exist_ok=True)

org_label = pd.read_csv(f"{dataset_labels_dir}/{dataset_name}.csv")
image_paths = org_label['name'].unique()
#print(image_paths)

def get_valid_image_path(image_path):
    phase = image_path.split(os.path.sep)[-2]
    valid_image_path = f"{dataset_images_dir}/{phase}/{os.path.basename(image_path)}"
    if os.path.exists(valid_image_path):
        return valid_image_path
    else:
        return None
    
def get_image_shape(image_path):
    return cv2.imread(image_path).shape

def get_noses_bbox(df, image_path, image_shape):
    noses_bbox = []
    
    mask = (df['class'] == 'Noze') & (df['name'] == image_path)
    noses = df.loc[mask, :].sort_values('confidence', ascending=False)
    num_noses = len(noses)
    for idx in range(num_noses):
        nose_cx = noses[idx:idx+1].x_c.values[0]
        nose_cy = noses[idx:idx+1].y_c.values[0]
        nose_w = int(noses[idx:idx+1].width.values[0] * image_shape[1])
        nose_h = int(noses[idx:idx+1].height.values[0] * image_shape[0])
        nose_x1 = int(nose_cx * image_shape[1]) - int(nose_w/2)
        nose_y1 = int(nose_cy * image_shape[0]) - int(nose_h/2)
        noses_bbox.append([nose_x1, nose_y1, nose_w, nose_h])
    
    return noses_bbox

def get_nostrils_coord(df, image_path, image_shape):
    nostrils_coords = []

    mask = (df['class'] == 'Hole') & (df['name'] == image_path)
    nostrils = df.loc[mask, :].sort_values('confidence', ascending=False)
    num_nostrils = len(nostrils)

    for idx in range(num_nostrils):
        nostrils_cx = int(nostrils[idx:idx+1].x_c.values[0] * image_shape[1])
        nostrils_cy = int(nostrils[idx:idx+1].y_c.values[0] * image_shape[0])
        nostrils_coords.append([nostrils_cx, nostrils_cy])
    
    return nostrils_coords

def verify_labels(img_path, label):
    img = cv2.imread(img_path)
    
    cv2.rectangle(img, (label[0], label[1]), (label[0]+label[2], label[1]+label[3]), (0, 0, 255), 20)
    cv2.circle(img, (label[4], label[5]), 1, (0, 255, 0), 50)
    cv2.circle(img, (label[6], label[7]), 1, (0, 255, 0), 50)

    filename = os.path.basename(img_path)
    cv2.imwrite(os.path.join(verification_path, filename), img)

In [None]:
for image_path in tqdm(image_paths):
    image_path_valid = get_valid_image_path(image_path)
    if image_path_valid is None:
        continue
    
    noses_bbox = get_noses_bbox(org_label, image_path, get_image_shape(image_path_valid))
    if len(noses_bbox) == 0:
        continue
    #print("noses_bbox: ", noses_bbox)

    nostrils_coords = get_nostrils_coord(org_label, image_path, get_image_shape(image_path_valid))
    if len(nostrils_coords) < 2:
        continue
    #print("nostrils_coord: ", nostrils_coord)

    with open(new_label_path, 'a') as f:
        f.write(f"# {image_path_valid}\n")
        for x1, y1, w, h in noses_bbox:
            f.write(f"{x1} {y1} {w} {h} ")
            for cx, cy in nostrils_coords:
                f.write(f"{cx} {cy} ")
            f.write("\n")

In [2]:
# verification
with open(new_label_path, 'r') as f:
    lines = f.readlines()
    for line in tqdm(lines):
        line = line.rstrip()
        if line.startswith('#'):
            img_path = line[2:]
        else:
            label = [int(x) for x in line.split(' ')]
            #print(label)
            verify_labels(img_path, label)