In [None]:
%pip install scikit-learn

In [5]:
import os
import pandas as pd
import numpy as np
import cv2
import shutil
import yaml
import warnings
warnings.filterwarnings("ignore")

from glob import glob
from tqdm import tqdm
from IPython.display import clear_output
from sklearn.model_selection import train_test_split

In [6]:
image_paths = sorted(glob("/home/ubuntu/workspace/datasets/dacon/yolo/images/train/*.png")) # 절대경로 지정 필요
txt_paths = sorted(glob("/home/ubuntu/workspace/datasets/dacon/testing_transfers/labelme_data/*.txt")) # 절대경로 지정 필요

SEED = 42
MODEL = 'v1'
train_images_paths, valid_images_paths, train_txt_paths, valid_txt_paths = train_test_split(image_paths, txt_paths, test_size=0.1, random_state=SEED)

In [7]:
def labelMe2Yolo(image_paths, txt_paths, type="train"):
    for image_path, txt_path in tqdm(zip(image_paths, txt_paths if not type == "test" else image_paths), total=len(image_paths)):
        source_image = cv2.imread(image_path, cv2.IMREAD_COLOR)        
        image_height, image_width, _ = source_image.shape
        
        target_image_path = f"/home/ubuntu/workspace/datasets/dacon/testing_transfers/yolo/{type}/{os.path.basename(image_path)}" # 절대경로 지정 필요
        cv2.imwrite(target_image_path, source_image)
        
        if type == "test":
            continue
        
        with open(txt_path, "r") as reader:
            yolo_labels = []
            lines = reader.readlines()
            for line in lines:
                line = list(map(float, line.strip().split(" ")))
                class_id = int(line[0])
                x_min, y_min = float(min(line[5], line[7])), float(min(line[6], line[8]))
                x_max, y_max = float(max(line[1], line[3])), float(max(line[2], line[4]))
                x, y = float(((x_min + x_max) / 2) / image_width), float(((y_min + y_max) / 2) / image_height)
                w, h = abs(x_max - x_min) / image_width, abs(y_max - y_min) / image_height
                yolo_labels.append(f"{class_id} {x} {y} {w} {h}")
            
        target_label_txt = f"/home/ubuntu/workspace/datasets/dacon/testing_transfers/yolo/{type}/{os.path.basename(txt_path)}" # 절대경로 지정 필요      
        with open(target_label_txt, "w") as writer:
            for yolo_label in yolo_labels:
                writer.write(f"{yolo_label}\n")

In [None]:
# YAML File 만드는 코드 (optional)

with open("/data/classes.txt", "r") as reader:
    lines = reader.readlines()
    classes = [line.strip().split(",")[1] for line in lines]

yaml_data = {
              "names": classes,
              "nc": len(classes),
              "path": "/data/yolo/",
              "train": "train",
              "val": "valid",
              "test": "test"
            }

with open("/data/yolocustom.yaml", "w") as writer:
    yaml.dump(yaml_data, writer)

In [10]:
labelMe2Yolo(train_images_paths, train_txt_paths, "train")
labelMe2Yolo(valid_images_paths, valid_txt_paths, "valid")

# labelMe2Yolo(sorted(glob("/test/*.png")), None, "test") # 테스트 이미지 폴더 절대경로 지정 필요

100%|██████████| 5832/5832 [13:38<00:00,  7.13it/s]
100%|██████████| 649/649 [01:31<00:00,  7.12it/s]


In [16]:
# source_txts = glob(f"{MODEL}/predict/labels/*.txt")
source_txts = glob("/tf/hayoung/datafortransformation/yolo/valid/*.txt")


# CSV 형태 Submission이 필요한 경우 
# confidence 필요 시 나중에 추가할 것
def submitYoloAsCSV(source_txts):
    results = []
    for source_txt in tqdm(source_txts):
        file_name = source_txt.split("/")[-1].replace(".txt", ".png") #필요시 이미지 확장자 변경할 것
        with open(source_txt, "r") as reader:        
            lines = reader.readlines()
            for line in lines: # confidence 필요 시 나중에 추가
                line = list(map(float, line.strip().split(" ")))
                class_id = int(line[0])
                point_x = line[1]
                point_y = line[2]
                width = line[3]
                height = line[4]
                
                results.append((file_name, class_id, point_x, point_y, width, height))
                
    df_submission = pd.DataFrame(data=results, columns=["file_name", "class_id", "point_x", "point_y", "width", "height"])
    df_submission.to_csv(f"/tf/hayoung/results/yolo_{MODEL}.csv", index=False) # 경로 나중에 변경할 것
    
submitYoloAsCSV(source_txts)

100%|██████████| 649/649 [00:00<00:00, 52076.74it/s]
