In [None]:
from src.data_preprocessing import (
    merge_coco_jsons, get_master_categories, convert_coco_to_yolo,
    clean_invalid_yolo_files_from_dir, get_image_files_from_coco_json,
    copy_images, split_train_val, create_yolo_yaml
)
from src.augmentation import copy_few_shot_images, fine_tune_delete_by_class_popularity_relaxed
from src.utils_io import check_images_labels, count_classes_in_yolo_txt, clear_yolo_dir
from src.utils_viz import (
    merge_all_jsons_recursive, visualize_overlapping_bboxes_with_all_labels,
    process_folder, plot_class_distribution
)

paths_to_clear = [
    "/content/ai03-level1-project/images",
    "/content/ai03-level1-project/labels",
    "/content/ai03-level1-project/merge_ann",
]
paths_to_make = [
    "/content/ai03-level1-project/merge_ann",
    "/content/ai03-level1-project/images/train_images",
    "/content/ai03-level1-project/images/val_images",
    "/content/ai03-level1-project/labels/train_images",
    "/content/ai03-level1-project/labels/val_images",
]
clear_yolo_dir(paths_to_clear, paths_to_make)

# 0) 중복/겹침 시각 점검(선택)
images_map, annotations_map, categories_map, chart_map = merge_all_jsons_recursive(source_label_dir)
visualize_overlapping_bboxes_with_all_labels(
    images_map, annotations_map, categories_map, chart_map, source_img_dir, iou_threshold=0.1
)

# 1) 병합
merge_coco_jsons(source_label_dir, merged_json_path)

# 2) 불완전 라벨 필터
clean_invalid_yolo_files_from_dir(merged_json_path, source_img_dir, source_label_dir)

# 3) 필터링 후 병합
merge_coco_jsons(source_label_dir, merged_filtered_json_path)

# 4) 이미지 복사
image_files = get_image_files_from_coco_json(merged_filtered_json_path)
copy_images(image_files, source_img_dir, yolo_image_dir)

# 5) YOLO 라벨 변환
master_cat_id_to_idx = get_master_categories([merged_filtered_json_path])[0]
convert_coco_to_yolo(
    annotation_file=merged_filtered_json_path,
    source_img_dir=yolo_image_dir,
    target_yolo_dir=yolo_label_dir,
    master_cat_id_to_idx=master_cat_id_to_idx
)

# 6) train/val 분리
split_train_val(
    image_dir=yolo_image_dir,
    label_dir=yolo_label_dir,
    train_img_dir=yolo_train_img_dir,
    val_img_dir=yolo_val_img_dir,
    train_label_dir=yolo_train_label_dir,
    val_label_dir=yolo_val_label_dir,
    val_ratio=0.15,
    seed=42
)

print("=== Train Dataset Check ===")
check_images_labels(yolo_train_img_dir, yolo_train_label_dir)
print("\n=== Validation Dataset Check ===")
check_images_labels(yolo_val_img_dir, yolo_val_label_dir)

before_counts = count_classes_in_yolo_txt(yolo_train_label_dir)

# 7) 증강
_, cat_id_to_name = get_master_categories([merged_filtered_json_path])

copy_few_shot_images(
    yolo_img_dir=yolo_train_img_dir,
    yolo_label_dir=yolo_train_label_dir,
    cat_id_to_name=cat_id_to_name,
    max_classes_threshold=76,
    top_n=0
)

# 8) 과다 클래스 완화 삭제
fine_tune_delete_by_class_popularity_relaxed(
    yolo_train_img_dir, yolo_train_label_dir, 76, 30
)

after_counts = count_classes_in_yolo_txt(yolo_train_label_dir)
plot_class_distribution(before_counts, after_counts)

# 9) 증강 데이터 검증
process_folder(yolo_train_img_dir, yolo_train_label_dir)

# 10) data.yaml
create_yolo_yaml(root_dir, merged_filtered_json_path, yaml_path)


In [None]:
from src.data_preprocessing import (
    merge_coco_jsons, get_master_categories, convert_coco_to_yolo,
    clean_invalid_yolo_files_from_dir, get_image_files_from_coco_json,
    copy_images, split_train_val, create_yolo_yaml
)
from src.augmentation import copy_few_shot_images, fine_tune_delete_by_class_popularity_relaxed
from src.utils_io import check_images_labels, count_classes_in_yolo_txt, clear_yolo_dir
from src.utils_viz import (
    merge_all_jsons_recursive, visualize_overlapping_bboxes_with_all_labels,
    process_folder, plot_class_distribution
)

base_dir = "/content/ai03-level1-project/sprint_ai03_1"

paths_to_clear = [
    f"{base_dir}/images",
    f"{base_dir}/labels",
    f"{base_dir}/merge_ann",
]

paths_to_make = [
    f"{base_dir}/data/merge_ann",
    f"{base_dir}/data/images/train_images",
    f"{base_dir}/data/images/val_images",
    f"{base_dir}/data/labels/train_images",
    f"{base_dir}/data/labels/val_images",
]

clear_yolo_dir(paths_to_clear, paths_to_make)

# 0) 중복/겹침 시각 점검(선택)
images_map, annotations_map, categories_map, chart_map = merge_all_jsons_recursive(source_label_dir)
visualize_overlapping_bboxes_with_all_labels(
    images_map, annotations_map, categories_map, chart_map, source_img_dir, iou_threshold=0.1
)

# 1) 병합
merge_coco_jsons(source_label_dir, merged_json_path)

# 2) 불완전 라벨 필터
clean_invalid_yolo_files_from_dir(merged_json_path, source_img_dir, source_label_dir)

# 3) 필터링 후 병합
merge_coco_jsons(source_label_dir, merged_filtered_json_path)

# 4) 이미지 복사
image_files = get_image_files_from_coco_json(merged_filtered_json_path)
copy_images(image_files, source_img_dir, yolo_image_dir)

# 5) YOLO 라벨 변환
master_cat_id_to_idx = get_master_categories([merged_filtered_json_path])[0]
convert_coco_to_yolo(
    annotation_file=merged_filtered_json_path,
    source_img_dir=yolo_image_dir,
    target_yolo_dir=yolo_label_dir,
    master_cat_id_to_idx=master_cat_id_to_idx
)

# 6) train/val 분리
split_train_val(
    image_dir=yolo_image_dir,
    label_dir=yolo_label_dir,
    train_img_dir=yolo_train_img_dir,
    val_img_dir=yolo_val_img_dir,
    train_label_dir=yolo_train_label_dir,
    val_label_dir=yolo_val_label_dir,
    val_ratio=0.15,
    seed=42
)

print("=== Train Dataset Check ===")
check_images_labels(yolo_train_img_dir, yolo_train_label_dir)
print("\n=== Validation Dataset Check ===")
check_images_labels(yolo_val_img_dir, yolo_val_label_dir)

before_counts = count_classes_in_yolo_txt(yolo_train_label_dir)

# 7) 증강
_, cat_id_to_name = get_master_categories([merged_filtered_json_path])

copy_few_shot_images(
    yolo_img_dir=yolo_train_img_dir,
    yolo_label_dir=yolo_train_label_dir,
    cat_id_to_name=cat_id_to_name,
    max_classes_threshold=76,
    top_n=0
)

# 8) 과다 클래스 완화 삭제
fine_tune_delete_by_class_popularity_relaxed(
    yolo_train_img_dir, yolo_train_label_dir, 76, 30
)

after_counts = count_classes_in_yolo_txt(yolo_train_label_dir)
plot_class_distribution(before_counts, after_counts)

# 9) 증강 데이터 검증
process_folder(yolo_train_img_dir, yolo_train_label_dir)

# 10) data.yaml
create_yolo_yaml(root_dir, merged_filtered_json_path, yaml_path)
