In [6]:
import os
import shutil

# 你的 3 个主文件夹
root_folders = ["GPR", "GPR-2", "hyperbola"]

# 输出目标文件夹
output_dir = "merged_dataset"
images_out = os.path.join(output_dir, "images")
labels_out = os.path.join(output_dir, "labels")
os.makedirs(images_out, exist_ok=True)
os.makedirs(labels_out, exist_ok=True)

# 需要合并的子文件夹（train 和 test）
subfolders = ["train", "test"]

# 记录已经使用过的文件名
used_names = set()
conflicts = []

for root in root_folders:
    for sub in subfolders:
        for sub_type in ["images", "labels"]:
            src = os.path.join(root, sub, sub_type)
            if not os.path.exists(src):
                continue
            dst = images_out if sub_type == "images" else labels_out
            for f in os.listdir(src):
                src_file = os.path.join(src, f)

                if f in used_names:
                    conflicts.append(f)
                    # 避免覆盖，加前缀
                    new_name = f"{os.path.basename(root)}_{sub}_{f}"
                else:
                    new_name = f
                    used_names.add(f)

                dst_file = os.path.join(dst, new_name)
                shutil.copy2(src_file, dst_file)

print("✅ Merge finished!")
print(f"Images saved in: {images_out}")
print(f"Labels saved in: {labels_out}")

if conflicts:
    print("\n⚠️ Found conflicts! These files had duplicate names and were renamed:")
    for c in conflicts:
        print(" -", c)
else:
    print("\n🎉 No duplicate file names found!")

✅ Merge finished!
Images saved in: merged_dataset/images
Labels saved in: merged_dataset/labels

🎉 No duplicate file names found!


In [7]:
import shutil
import os

# 已经 merge 好的文件夹
merged_images = "merged_dataset/images"
merged_labels = "merged_dataset/labels"

# 目标父文件夹
target_images_parent = "yolo_50_final/image50"
target_labels_parent = "yolo_50_final/label50"

# 确保目标父文件夹存在
os.makedirs(target_images_parent, exist_ok=True)
os.makedirs(target_labels_parent, exist_ok=True)

# 复制整个文件夹
shutil.copytree(merged_images, os.path.join(target_images_parent, "images"), dirs_exist_ok=True)
shutil.copytree(merged_labels, os.path.join(target_labels_parent, "labels"), dirs_exist_ok=True)

print("✅ Done! merged images folder copied into:", target_images_parent)
print("✅ Done! merged labels folder copied into:", target_labels_parent)

✅ Done! merged images folder copied into: yolo_50_final/image50
✅ Done! merged labels folder copied into: yolo_50_final/label50


In [8]:
import os
import shutil

# 最开始的三个根目录
roots = ["GPR", "GPR-2", "hyperbola"]

# 目标目录
target_images_parent = "yolo_50_final/image50"
target_labels_parent = "yolo_50_final/label50"

# 确保目录存在
os.makedirs(target_images_parent, exist_ok=True)
os.makedirs(target_labels_parent, exist_ok=True)

# 创建 val_images 和 val_labels 文件夹
val_images = os.path.join(target_images_parent, "val_images")
val_labels = os.path.join(target_labels_parent, "val_labels")
os.makedirs(val_images, exist_ok=True)
os.makedirs(val_labels, exist_ok=True)

# 遍历三个源目录，把 valid/images 和 valid/labels 合并
for root in roots:
    src_img = os.path.join(root, "valid/images")
    src_lbl = os.path.join(root, "valid/labels")
    if os.path.exists(src_img):
        for f in os.listdir(src_img):
            shutil.copy2(os.path.join(src_img, f), val_images)
    if os.path.exists(src_lbl):
        for f in os.listdir(src_lbl):
            shutil.copy2(os.path.join(src_lbl, f), val_labels)

print("✅ All valid images merged into:", val_images)
print("✅ All valid labels merged into:", val_labels)

✅ All valid images merged into: yolo_50_final/image50/val_images
✅ All valid labels merged into: yolo_50_final/label50/val_labels


In [9]:
import os
import shutil

# 目标目录
image_root = "yolo_50_final/image50"
label_root = "yolo_50_final/label50"

# 输出最终目录
train_img_dir = os.path.join(image_root, "train")
val_img_dir   = os.path.join(image_root, "val")
train_lbl_dir = os.path.join(label_root, "train")
val_lbl_dir   = os.path.join(label_root, "val")

os.makedirs(train_img_dir, exist_ok=True)
os.makedirs(val_img_dir, exist_ok=True)
os.makedirs(train_lbl_dir, exist_ok=True)
os.makedirs(val_lbl_dir, exist_ok=True)

# 定义要合并的源目录
merge_plan = [
    # images
    (os.path.join(image_root, "first_half_training"), train_img_dir),
    (os.path.join(image_root, "images"), train_img_dir),
    (os.path.join(image_root, "first_half_val"), val_img_dir),
    (os.path.join(image_root, "val_images"), val_img_dir),

    # labels
    (os.path.join(label_root, "first_half_training"), train_lbl_dir),
    (os.path.join(label_root, "labels"), train_lbl_dir),
    (os.path.join(label_root, "first_half_val"), val_lbl_dir),
    (os.path.join(label_root, "val_labels"), val_lbl_dir),
]

# 执行合并
for src, dst in merge_plan:
    if os.path.exists(src):
        for f in os.listdir(src):
            shutil.copy2(os.path.join(src, f), dst)

print("✅ All datasets merged successfully!")
print("Train images:", train_img_dir)
print("Val images:", val_img_dir)
print("Train labels:", train_lbl_dir)
print("Val labels:", val_lbl_dir)

✅ All datasets merged successfully!
Train images: yolo_50_final/image50/train
Val images: yolo_50_final/image50/val
Train labels: yolo_50_final/label50/train
Val labels: yolo_50_final/label50/val


In [13]:
import os

# 根目录
image_root = "yolo_final/image50"
label_root = "yolo_final/label50"

# 检查配对
def check_pairs(img_dir, lbl_dir):
    errors = []
    img_files = sorted([f for f in os.listdir(img_dir) if f.lower().endswith(('.jpg','.jpeg','.png'))])
    lbl_files = sorted([f for f in os.listdir(lbl_dir) if f.lower().endswith('.txt')])

    img_stems = {os.path.splitext(f)[0] for f in img_files}
    lbl_stems = {os.path.splitext(f)[0] for f in lbl_files}

    missing_labels = img_stems - lbl_stems
    missing_images = lbl_stems - img_stems

    if missing_labels:
        errors.append(f"❌ Missing labels for images: {list(missing_labels)[:10]}")
    if missing_images:
        errors.append(f"❌ Labels without images: {list(missing_images)[:10]}")

    return errors, img_files, lbl_files

# 检查 label 格式
def check_label_format(lbl_dir):
    errors = []
    for f in os.listdir(lbl_dir):
        if not f.endswith(".txt"):
            continue
        with open(os.path.join(lbl_dir,f), "r") as fh:
            for i, line in enumerate(fh,1):
                parts = line.strip().split()
                if len(parts) != 5:
                    errors.append(f"{f}: line {i} has {len(parts)} parts (expected 5)")
                    continue
                try:
                    cls = int(parts[0])
                    nums = list(map(float, parts[1:]))
                    if not all(0 <= n <= 1 for n in nums):
                        errors.append(f"{f}: line {i} values out of range [0,1]")
                except:
                    errors.append(f"{f}: line {i} parse error")
    return errors

# ========== Run checks ==========
sets = [("train", os.path.join(image_root,"train"), os.path.join(label_root,"train")),
        ("val",   os.path.join(image_root,"val"),   os.path.join(label_root,"val"))]

for name, img_dir, lbl_dir in sets:
    print(f"\nChecking {name} set...")
    if not os.path.exists(img_dir) or not os.path.exists(lbl_dir):
        print(f"❌ {name} directories missing!")
        continue

    errors, imgs, lbls = check_pairs(img_dir, lbl_dir)
    if errors:
        for e in errors: print(e)
    else:
        print(f"✅ All {len(imgs)} images have matching labels")

    label_errors = check_label_format(lbl_dir)
    if label_errors:
        print("⚠️ Label format issues:")
        for e in label_errors[:10]:
            print("   ", e)
    else:
        print("✅ All labels are valid YOLO format")


Checking train set...
✅ All 3212 images have matching labels
✅ All labels are valid YOLO format

Checking val set...
✅ All 351 images have matching labels
✅ All labels are valid YOLO format


In [12]:
import os
import shutil

# === 修改成你的数据集根目录 ===
ROOT = "yolo_final"  

label_train_dir = os.path.join(ROOT, "label50", "train")
label_val_dir   = os.path.join(ROOT, "label50", "val")
classes_path    = os.path.join(label_train_dir, "classes.txt")

# 1. 如果 train/labels 里有 classes.txt -> 移到根目录
if os.path.exists(classes_path):
    new_path = os.path.join(ROOT, "classes.txt")
    shutil.move(classes_path, new_path)
    print(f"✅ Moved classes.txt to {new_path}")

# 2. 清理 label 目录里所有非 .txt 文件
for subdir in [label_train_dir, label_val_dir]:
    for f in os.listdir(subdir):
        if not f.endswith(".txt"):
            os.remove(os.path.join(subdir, f))
            print(f"🗑️ Removed non-label file: {os.path.join(subdir, f)}")

print("🎉 Clean up done! Now dataset should be YOLO-compatible.")

✅ Moved classes.txt to yolo_final/classes.txt
🎉 Clean up done! Now dataset should be YOLO-compatible.
