In [17]:
import os
import cv2
import numpy as np
import random

# 定义数据增强函数
def augment_data(image, bbox, angle_range=(-10, 10), dropout_prob=0.1, affine_scale=(0.9, 1.1)):
    augmented_image = image.copy()
    augmented_bbox = bbox.copy()

    # 随机旋转
    angle = random.uniform(angle_range[0], angle_range[1])
    height, width, _ = augmented_image.shape
    rotation_matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1)
    augmented_image = cv2.warpAffine(augmented_image, rotation_matrix, (width, height))

    # 随机Dropout
    if random.random() < dropout_prob:
        y1, x1, y2, x2 = augmented_bbox
        h, w = y2 - y1, x2 - x1
        x1 += random.uniform(-w * 0.2, w * 0.2)
        x2 += random.uniform(-w * 0.2, w * 0.2)
        y1 += random.uniform(-h * 0.2, h * 0.2)
        y2 += random.uniform(-h * 0.2, h * 0.2)
        x1, y1, x2, y2 = max(0, x1), max(0, y1), min(width, x2), min(height, y2)
        augmented_bbox = [y1, x1, y2, x2]

    # 随机仿射变换
    scale_factor = random.uniform(affine_scale[0], affine_scale[1])
    translation_matrix = np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)
    scale_matrix = np.array([[scale_factor, 0, 0], [0, scale_factor, 0]], dtype=np.float32)

    # 进行仿射变换的串联，而不是矩阵相乘
    affine_matrix = cv2.add(translation_matrix, scale_matrix)
    augmented_image = cv2.warpAffine(augmented_image, affine_matrix, (width, height))

    return augmented_image, augmented_bbox

# 输入文件夹和输出文件夹路径
input_folder = '../data_jsai/train'
output_folder = '../data_jsai/train1'

# 创建输出文件夹（如果不存在）
os.makedirs(output_folder, exist_ok=True)

# 遍历输入文件夹中的图像和标签文件
for filename in os.listdir(input_folder):
    print(filename)
    if filename.endswith('.jpg'):
        image_path = os.path.join(input_folder, filename)
        label_path = os.path.join(input_folder, filename.replace('.jpg', '.txt'))

        # 读取图像
        image = cv2.imread(image_path)

        # 读取标签
        with open(label_path, 'r') as file:
            lines = file.read().strip().split('\n')

        bboxes = []
        for line in lines:
            class_id, x, y, width, height = map(float, line.split())
            x, y, width, height = int(x * image.shape[1]), int(y * image.shape[0]), int(width * image.shape[1]), int(height * image.shape[0])
            bbox = [y - height // 2, x - width // 2, y + height // 2, x + width // 2]
            bboxes.append(bbox)

        # 进行数据增强
        augmented_images = []
        augmented_bboxes_list = []

        for bbox in bboxes:
            augmented_image, augmented_bbox = augment_data(image, bbox)
            augmented_images.append(augmented_image)
            augmented_bboxes_list.append(augmented_bbox)

        # 保存增强后的图像和标签
        for i in range(len(augmented_images)):
            output_image_path = os.path.join(output_folder, f"{filename}_{i}.jpg")
            cv2.imwrite(output_image_path, augmented_images[i])

            output_label_path = os.path.join(output_folder, f"{filename}_{i}.txt")
            augmented_bbox = augmented_bboxes_list[i]
            if isinstance(augmented_bbox[0], list):
                # 处理未进行数据增强的bbox
                y1, x1, y2, x2 = map(int, augmented_bbox)
                x = (x1 + x2) / (2 * augmented_images[i].shape[1])
                y = (y1 + y2) / (2 * augmented_images[i].shape[0])
                width = (x2 - x1) / augmented_images[i].shape[1]
                height = (y2 - y1) / augmented_images[i].shape[0]
                augmented_bbox = [x, y, width, height]
            with open(output_label_path, 'w') as file:
                file.write(f'{class_id} {" ".join(map(str, augmented_bbox))}\n')


23149-out_ori.txt
23429-out_ori.jpg
20570-out_ori.jpg
22628-out_ori.jpg
24074-out_ori.txt
24548-out_ori.jpg
23065-out_ori.txt
24622-out_ori.txt
24120-out_ori.txt
23399-out_ori.txt
24633-out_ori.txt
24414-out_ori.txt
23270-out_ori.txt
20127-out_ori.txt
20322-out_ori.txt
24433-out_ori.jpg
22799-out_ori.txt
24344-out_ori.jpg
23979-out_ori.jpg
24081-out_ori.txt
23213-out_ori.jpg
22683-out_ori.jpg
23461-out_ori.jpg
23795-out_ori.jpg
24110-out_ori.txt
23096-out_ori.jpg
24611-out_ori.txt
23241-out_ori.jpg
24619-out_ori.jpg
24516-out_ori.jpg
23173-out_ori.txt
24261-out_ori.jpg
24189-out_ori.jpg
24620-out_ori.jpg
23328-out_ori.jpg
23196-out_ori.jpg
23653-out_ori.txt
23402-out_ori.txt
24419-out_ori.txt
22960-out_ori.txt
19828-out_ori.txt
22802-out_ori.jpg
24511-out_ori.jpg
23253-out_ori.jpg
24054-out_ori.txt
23834-out_ori.jpg
24019-out_ori.jpg
24118-out_ori.txt
23333-out_ori.txt
22616-out_ori.jpg
18-out_ori.txt
23055-out_ori.jpg
23521-out_ori.txt
19741-out_ori.jpg
23198-out_ori.txt
23761-out_ori

24531-out_ori.jpg
23895-out_ori.txt
23458-out_ori.txt
24341-out_ori.jpg
24150-out_ori.txt
23234-out_ori.txt
24101-out_ori.txt
140-out_ori.jpg
23088-out_ori.jpg
23926-out_ori.txt
22750-out_ori.jpg
20392-out_ori.txt
23363-out_ori.txt
24141-out_ori.jpg
24161-out_ori.jpg
112-out_ori.jpg
22688-out_ori.jpg
24014-out_ori.txt
23755-out_ori.txt
23335-out_ori.jpg
23282-out_ori.jpg
23223-out_ori.jpg
22686-out_ori.jpg
23310-out_ori.jpg
23753-out_ori.jpg
23160-out_ori.jpg
23953-out_ori.txt
191-out_ori.txt
23838-out_ori.txt
24355-out_ori.jpg
19976-out_ori.jpg
23284-out_ori.jpg
23238-out_ori.jpg
24367-out_ori.txt
23102-out_ori.txt
24435-out_ori.jpg
23180-out_ori.txt
23702-out_ori.jpg
24291-out_ori.txt
19-out_ori.txt
23164-out_ori.txt
23078-out_ori.txt
23467-out_ori.txt
23953-out_ori.jpg
22889-out_ori.txt
22653-out_ori.txt
24133-out_ori.jpg
22909-out_ori.jpg
24604-out_ori.txt
24160-out_ori.jpg
24381-out_ori.txt
22793-out_ori.jpg
22956-out_ori.jpg
24390-out_ori.jpg
24480-out_ori.jpg
22858-out_ori.jpg
2

23435-out_ori.txt
22795-out_ori.txt
22984-out_ori.txt
20235-out_ori.jpg
23005-out_ori.txt
19952-out_ori.jpg
23667-out_ori.txt
23235-out_ori.jpg
23725-out_ori.jpg
24084-out_ori.jpg
20292-out_ori.jpg
153-out_ori.txt
24517-out_ori.jpg
23547-out_ori.txt
23800-out_ori.txt
23681-out_ori.txt
22869-out_ori.txt
24135-out_ori.txt
23041-out_ori.txt
24304-out_ori.jpg
24587-out_ori.txt
24045-out_ori.jpg
24026-out_ori.txt
23715-out_ori.txt
23122-out_ori.txt
22614-out_ori.txt
24132-out_ori.txt
23954-out_ori.txt
23970-out_ori.txt
20009-out_ori.jpg
22916-out_ori.txt
23119-out_ori.jpg
22648-out_ori.txt
23436-out_ori.txt
24463-out_ori.jpg
23905-out_ori.txt
23939-out_ori.txt
23394-out_ori.txt
23644-out_ori.jpg
23970-out_ori.jpg
23365-out_ori.jpg
24022-out_ori.txt
23400-out_ori.jpg
24030-out_ori.txt
22702-out_ori.txt
23278-out_ori.txt
22809-out_ori.jpg
20464-out_ori.jpg
22710-out_ori.txt
23742-out_ori.txt
23244-out_ori.txt
24012-out_ori.txt
24173-out_ori.txt
24234-out_ori.jpg
23089-out_ori.jpg
23682-out_or

23880-out_ori.txt
23069-out_ori.jpg
23654-out_ori.txt
41-out_ori.txt
24073-out_ori.txt
20447-out_ori.jpg
23539-out_ori.txt
24317-out_ori.jpg
20040-out_ori.txt
24518-out_ori.jpg
23067-out_ori.jpg
23748-out_ori.txt
22622-out_ori.jpg
24219-out_ori.txt
22632-out_ori.jpg
22756-out_ori.jpg
23110-out_ori.txt
148-out_ori.txt
23273-out_ori.jpg
23528-out_ori.txt
24528-out_ori.txt
23668-out_ori.jpg
24577-out_ori.jpg
23740-out_ori.jpg
23394-out_ori.jpg
24233-out_ori.txt
23814-out_ori.txt
24455-out_ori.txt
24076-out_ori.jpg
23387-out_ori.jpg
23049-out_ori.txt
23065-out_ori.jpg
23286-out_ori.txt
23234-out_ori.jpg
24184-out_ori.txt
23028-out_ori.txt
177-out_ori.txt
24035-out_ori.jpg
22663-out_ori.txt
22635-out_ori.txt
19845-out_ori.jpg
23766-out_ori.txt
22795-out_ori.jpg
22880-out_ori.txt
24131-out_ori.txt
23796-out_ori.jpg
23874-out_ori.txt
23835-out_ori.jpg
24159-out_ori.txt
22843-out_ori.jpg
23392-out_ori.jpg
23522-out_ori.txt
23048-out_ori.txt
24613-out_ori.txt
22964-out_ori.jpg
24314-out_ori.txt

23600-out_ori.txt
19741-out_ori.txt
24078-out_ori.txt
19839-out_ori.txt
24281-out_ori.txt
23089-out_ori.txt
23547-out_ori.jpg
23074-out_ori.txt
22722-out_ori.txt
23083-out_ori.jpg
22810-out_ori.jpg
22886-out_ori.jpg
24491-out_ori.txt
23658-out_ori.txt
170-out_ori.jpg
24542-out_ori.jpg
24089-out_ori.jpg
20491-out_ori.txt
24157-out_ori.txt
20285-out_ori.txt
79-out_ori.txt
24123-out_ori.jpg
24338-out_ori.jpg
24502-out_ori.jpg
23764-out_ori.txt
24406-out_ori.jpg
23106-out_ori.txt
24143-out_ori.jpg
23645-out_ori.jpg
22963-out_ori.txt
23121-out_ori.txt
23010-out_ori.jpg
23801-out_ori.jpg
23760-out_ori.jpg
23248-out_ori.txt
24588-out_ori.jpg
22924-out_ori.jpg
24215-out_ori.txt
22762-out_ori.jpg
24448-out_ori.txt
24612-out_ori.txt
23103-out_ori.txt
23309-out_ori.jpg
23747-out_ori.jpg
24565-out_ori.txt
23795-out_ori.txt
22804-out_ori.txt
181-out_ori.txt
23049-out_ori.jpg
20152-out_ori.txt
24063-out_ori.txt
20323-out_ori.txt
22990-out_ori.jpg
24538-out_ori.txt
23530-out_ori.txt
23709-out_ori.jpg

20331-out_ori.txt
22860-out_ori.txt
24169-out_ori.txt
23426-out_ori.txt
22826-out_ori.jpg
22596-out_ori.txt
23793-out_ori.txt
23669-out_ori.txt
22620-out_ori.txt
23737-out_ori.jpg
23298-out_ori.txt
15-out_ori.jpg
22876-out_ori.txt
23749-out_ori.jpg
23079-out_ori.txt
24632-out_ori.jpg
20447-out_ori.txt
23468-out_ori.jpg
24471-out_ori.jpg
20299-out_ori.txt
23649-out_ori.jpg
23039-out_ori.jpg
23512-out_ori.jpg
23533-out_ori.jpg
20302-out_ori.txt
22690-out_ori.jpg
23945-out_ori.jpg
19928-out_ori.jpg
20414-out_ori.jpg
175-out_ori.txt
22648-out_ori.jpg
24255-out_ori.txt
23082-out_ori.txt
23881-out_ori.txt
22874-out_ori.txt
22887-out_ori.jpg
24396-out_ori.txt
22716-out_ori.jpg
24149-out_ori.txt
23758-out_ori.txt
22882-out_ori.jpg
23600-out_ori.jpg
24224-out_ori.jpg
24574-out_ori.txt
24376-out_ori.jpg
22746-out_ori.txt
24315-out_ori.txt
22857-out_ori.jpg
19976-out_ori.txt
23934-out_ori.txt
20235-out_ori.txt
23838-out_ori.jpg
22962-out_ori.txt
23155-out_ori.jpg
24201-out_ori.jpg
20391-out_ori.j

23660-out_ori.jpg
24391-out_ori.jpg
20156-out_ori.jpg
23927-out_ori.txt
19751-out_ori.txt
24056-out_ori.jpg
24013-out_ori.jpg
22947-out_ori.jpg
24196-out_ori.txt
194-out_ori.jpg
23739-out_ori.jpg
24048-out_ori.jpg
20115-out_ori.jpg
23297-out_ori.jpg
23494-out_ori.jpg
23112-out_ori.txt
23717-out_ori.txt
24450-out_ori.jpg
23178-out_ori.txt
22691-out_ori.txt
23460-out_ori.jpg
23701-out_ori.txt
19973-out_ori.txt
22678-out_ori.txt
23161-out_ori.txt
19714-out_ori.jpg
23727-out_ori.txt
23172-out_ori.txt
24254-out_ori.txt
20093-out_ori.txt
23883-out_ori.txt
22907-out_ori.jpg
24627-out_ori.jpg
23305-out_ori.jpg
23799-out_ori.jpg
24464-out_ori.txt
23115-out_ori.txt
23252-out_ori.txt
24053-out_ori.txt
22849-out_ori.txt
23155-out_ori.txt
23090-out_ori.jpg
23549-out_ori.txt
143-out_ori.jpg
23357-out_ori.jpg
22889-out_ori.jpg
23475-out_ori.txt
23329-out_ori.txt
23146-out_ori.txt
19898-out_ori.jpg
22963-out_ori.jpg
20440-out_ori.txt
22781-out_ori.txt
23504-out_ori.jpg
23899-out_ori.jpg
20487-out_ori.

23093-out_ori.txt
20552-out_ori.jpg
23463-out_ori.txt
24058-out_ori.jpg
24623-out_ori.txt
23939-out_ori.jpg
19713-out_ori.jpg
24133-out_ori.txt
23818-out_ori.jpg
23465-out_ori.txt
23264-out_ori.jpg
23615-out_ori.jpg
20348-out_ori.txt
22803-out_ori.jpg
24238-out_ori.txt
24131-out_ori.jpg
20464-out_ori.txt
24245-out_ori.jpg
22791-out_ori.txt
22765-out_ori.jpg
23411-out_ori.jpg
22593-out_ori.txt
20369-out_ori.txt
23642-out_ori.jpg
23812-out_ori.jpg
24254-out_ori.jpg
24621-out_ori.txt
23823-out_ori.txt
24185-out_ori.txt
23393-out_ori.txt
23409-out_ori.jpg
22599-out_ori.jpg
20484-out_ori.txt
23099-out_ori.jpg
22898-out_ori.txt
23259-out_ori.txt
24076-out_ori.txt
24235-out_ori.txt
23556-out_ori.jpg
22997-out_ori.txt
24508-out_ori.txt
23373-out_ori.txt
22738-out_ori.jpg
24037-out_ori.txt
23153-out_ori.txt
23888-out_ori.txt
20429-out_ori.txt
24305-out_ori.jpg
23488-out_ori.jpg
19951-out_ori.jpg
23937-out_ori.txt
19750-out_ori.jpg
24531-out_ori.txt
20107-out_ori.jpg
23209-out_ori.jpg
23303-out_

In [25]:
import os

def count_txt_and_jpg_files(folder_path):
    # 初始化.txt文件和.jpg文件的计数器
    txt_count = 0
    jpg_count = 0

    # 遍历文件夹中的文件
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            txt_count += 1
        elif filename.endswith('.jpg'):
            jpg_count += 1

    return txt_count, jpg_count

# 指定文件夹路径
folder_path = '../data_jsai/train1'

# 调用函数并获取.txt文件和.jpg文件的数量
txt_count, jpg_count = count_txt_and_jpg_files(folder_path)

# 打印结果
print(f"Number of .txt files: {txt_count}")
print(f"Number of .jpg files: {jpg_count}")

Number of .txt files: 3834
Number of .jpg files: 3834


In [24]:
import os
import cv2
import numpy as np
import random

# 定义数据增强函数
def augment_data(image, bbox, angle_range=(-10, 10), dropout_prob=0.1, affine_scale=(0.9, 1.1)):
    augmented_image = image.copy()
    augmented_bbox = bbox.copy()

    # 随机旋转
    angle = random.uniform(angle_range[0], angle_range[1])
    height, width, _ = augmented_image.shape
    rotation_matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1)
    augmented_image = cv2.warpAffine(augmented_image, rotation_matrix, (width, height))

    # 随机Dropout
    if random.random() < dropout_prob:
        y1, x1, y2, x2 = augmented_bbox
        h, w = y2 - y1, x2 - x1
        x1 += random.uniform(-w * 0.2, w * 0.2)
        x2 += random.uniform(-w * 0.2, w * 0.2)
        y1 += random.uniform(-h * 0.2, h * 0.2)
        y2 += random.uniform(-h * 0.2, h * 0.2)
        x1, y1, x2, y2 = max(0, x1), max(0, y1), min(width, x2), min(height, y2)
        augmented_bbox = [y1, x1, y2, x2]

    # 随机仿射变换
    scale_factor = random.uniform(affine_scale[0], affine_scale[1])
    translation_matrix = np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)
    scale_matrix = np.array([[scale_factor, 0, 0], [0, scale_factor, 0]], dtype=np.float32)

    # 进行仿射变换的串联，而不是矩阵相乘
    affine_matrix = cv2.add(translation_matrix, scale_matrix)
    augmented_image = cv2.warpAffine(augmented_image, affine_matrix, (width, height))

    return augmented_image, augmented_bbox

# 输入文件夹和输出文件夹路径
input_folder = '../data_jsai/train'
output_folder = '../data_jsai/train1'

# 创建输出文件夹（如果不存在）
os.makedirs(output_folder, exist_ok=True)

# 遍历输入文件夹中的图像和标签文件
for filename in os.listdir(input_folder):
    if filename.endswith('.jpg'):
        image_path = os.path.join(input_folder, filename)
        label_path = os.path.join(input_folder, filename.replace('.jpg', '.txt'))

        # 读取图像
        image = cv2.imread(image_path)

        # 读取标签
        with open(label_path, 'r') as file:
            lines = file.read().strip().split('\n')

        bboxes = []
        for line in lines:
            class_id, x, y, width, height = map(float, line.split())
            x, y, width, height = x * 1.0 / image.shape[1], y * 1.0 / image.shape[0], width * 1.0 / image.shape[1], height * 1.0 / image.shape[0]
            bbox = [y - height / 2, x - width / 2, y + height / 2, x + width / 2]
            bboxes.append(bbox)

        # 进行数据增强
        augmented_images = []
        augmented_bboxes_list = []

        for bbox in bboxes:
            augmented_image, augmented_bbox = augment_data(image, bbox)
            augmented_images.append(augmented_image)
            augmented_bboxes_list.append(augmented_bbox)

        # 保存增强后的图像和标签
        for i in range(len(augmented_images)):
            output_image_path = os.path.join(output_folder, f"{filename}_{i}.jpg")
            cv2.imwrite(output_image_path, augmented_images[i])

            output_label_path = os.path.join(output_folder, f"{filename}_{i}.txt")
            augmented_bbox = augmented_bboxes_list[i]
            if isinstance(augmented_bbox[0], list):
                # 处理未进行数据增强的bbox
                y1, x1, y2, x2 = map(int, augmented_bbox)
                x = (x1 + x2) / (2 * augmented_images[i].shape[1])
                y = (y1 + y2) / (2 * augmented_images[i].shape[0])
                width = (x2 - x1) / augmented_images[i].shape[1]
                height = (y2 - y1) / augmented_images[i].shape[0]
                augmented_bbox = [x, y, width, height]
            with open(output_label_path, 'w') as file:
                file.write(f'{class_id} {" ".join(map(str, augmented_bbox))}\n')
