In [7]:
import os
import shutil
import random
import pandas as pd

# 定义输入文件夹、输出文件夹和CSV文件
input_folder = './images'  # 输入图像文件夹，包含所有图像文件
output_train_folder = 'image_train'  # 训练集输出文件夹
output_test_folder = 'image_test'  # 测试集输出文件夹
csv_file = './anno.csv'  # 包含图像标注的CSV文件

# 创建输出文件夹
os.makedirs(output_train_folder, exist_ok=True)
os.makedirs(output_test_folder, exist_ok=True)

# 获取所有图像文件列表
image_files = [f for f in sorted(os.listdir(
    input_folder)) if os.path.isfile(os.path.join(input_folder, f))]

# 计算划分比例
train_ratio = 0.8
test_ratio = 0.2

# 随机打乱图像文件列表
random.shuffle(image_files)

# 根据划分比例计算训练集和测试集的数量
num_total_images = len(image_files)
num_train_images = int(num_total_images * train_ratio)
num_test_images = num_total_images - num_train_images

# 将图像文件复制到训练集文件夹
for i in range(num_train_images):
    source_path = os.path.join(input_folder, image_files[i])
    destination_path = os.path.join(output_train_folder, image_files[i])
    shutil.copyfile(source_path, destination_path)

# 将剩余的图像文件复制到测试集文件夹
for i in range(num_train_images, num_total_images):
    source_path = os.path.join(input_folder, image_files[i])
    destination_path = os.path.join(output_test_folder, image_files[i])
    shutil.copyfile(source_path, destination_path)


In [6]:
# 读取CSV文件
df = pd.read_csv(csv_file)

# 根据图像文件名划分CSV数据
train_csv = df[df.iloc[:, 0].isin(image_files[:num_train_images])]
test_csv = df[df.iloc[:, 0].isin(image_files[num_train_images:])]


# 保存划分后的CSV文件
train_csv.to_csv('train_anno.csv', index=False)
test_csv.to_csv('test_anno.csv', index=False)

print(
    f"划分完成，训练集包含 {num_train_images} 张图像和相应的标注，测试集包含 {num_test_images} 张图像和相应的标注。")

划分完成，训练集包含 40000 张图像和相应的标注，测试集包含 10000 张图像和相应的标注。
