In [2]:
import os
import random
from PIL import Image
import torchvision.transforms as transforms
import json
from tqdm import tqdm

In [6]:
# 数据增强的变换操作
def get_transform(base_width, base_height):
    crop_ratio = random.uniform(0.7, 1.0)
    return transforms.Compose([
        transforms.ToTensor(),
        transforms.RandomCrop(size=(int(base_height * crop_ratio), int(base_width * crop_ratio))),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToPILImage()
    ])

# 调整尺寸的变换操作
def resize_transform(base_width, base_height, scale_range):
    return transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((int(base_height * random.uniform(*scale_range)), 
                           int(base_width * random.uniform(*scale_range)))),
        transforms.ToPILImage()
    ])

# 加载JSON文件
def load_json(json_path):
    with open(json_path, 'r') as f:
        return json.load(f)

# 保存JSON文件
def save_json(data, json_path):
    with open(json_path, 'w') as f:
        json.dump(data, f, indent=4)

# 创建新的记录
def create_new_record(original_record, new_images, index):
    return {
        "id": f"{original_record['id']}_{index}",
        "instruction": original_record["instruction"],
        "input": original_record.get("input", ""),
        "output": original_record.get("output", ""),
        "image": new_images
    }

# 图像处理和数据更新
def augment_images(data, output_json_path, transform_count=10, scale_range=(0.7, 1.3)):
    new_data = data[:]
    for item in tqdm(data):
        if 'image' in item and item['image']:
            base_image_paths = item['image']

            for i in range(1, transform_count + 1):
                new_image_paths = []
                for image_path in base_image_paths:
                    base_path, ext = os.path.splitext(image_path)

                    # 打开图像
                    image = Image.open(image_path).convert('RGB')
                    base_width, base_height = image.size

                    # 应用变换
                    transformed_image = get_transform(base_width, base_height)(image)
                    resized_image = resize_transform(base_width, base_height, scale_range)(transformed_image)

                    # 构建新的文件名并保存
                    new_image_path = f"{base_path}_{i}{ext}"
                    resized_image.save(new_image_path)
                    new_image_paths.append(new_image_path)

                # 添加新记录到数据中
                new_data.append(create_new_record(item, new_image_paths, i))
        else:
            print(item["id"])
    
    save_json(new_data, output_json_path)
    print("图像变换和保存完成，JSON文件已更新。")

# 主函数
if __name__ == "__main__":
    json_file_path = 'mire/train-augment/train.json'
    data = load_json(json_file_path)
    augment_images(data, json_file_path)

  9%|▉         | 88/1000 [02:29<30:39,  2.02s/it]  

212b14da17281748234545696d0add


100%|██████████| 1000/1000 [29:56<00:00,  1.80s/it] 

图像变换和保存完成，JSON文件已更新。





In [8]:
import re

json_path = 'mire/train-augment/train.json'
with open(json_path, 'r') as f:
    json_data = json.load(f)

    # 遍历每条记录
    for item in json_data:
        # 计算"<image>"出现的次数
        image_count_in_instruction = len(re.findall(r'<image>', item['instruction']))
    
        # 检查"image"字段的长度是否与"<image>"出现的次数匹配
        if len(item['image']) != image_count_in_instruction:
            # 如果不匹配，打印该条记录的"id"字段
            print(item['id'])