In [2]:
import os
import shutil
from PIL import Image
from sklearn.model_selection import train_test_split

# 输入文件夹路径
fundus_dir = "./data/oct17/fundus"
oct_dir = "./data/oct17/oct"

# 输出文件夹路径
output_dirs = {
    "train_A": "./data/oct17/train_A",
    "train_B": "./data/oct17/train_B",
    "test_A": "./data/oct17/test_A",
    "test_B": "./data/oct17/test_B",
}

# 创建输出文件夹
for folder in output_dirs.values():
    os.makedirs(folder, exist_ok=True)

# 解析文件名函数
def parse_filename(filename):
    parts = filename.split("_")
    patient_id = parts[0]
    eyeball = parts[1]
    category = parts[2]
    sample_number = parts[3].split(".")[0]
    return patient_id, eyeball, category, sample_number

# 收集文件信息
fundus_images = {}
oct_images = {}

# 遍历 eye_fundus 文件夹
for subdir, _, files in os.walk(fundus_dir):
    for file in files:
        if file.endswith(".jpg"):
            patient_id, eyeball, category, sample_number = parse_filename(file)
            if category == "f":
                key = (patient_id, eyeball)
                # 只保留每个病人每只眼球的第一张采样图片
                if key not in fundus_images:
                    fundus_images[key] = os.path.join(subdir, file)

# 遍历 oct 文件夹
for subdir, _, files in os.walk(oct_dir):
    for file in files:
        if file.endswith(".jpg"):
            patient_id, eyeball, category, sample_number = parse_filename(file)
            if category == "o":
                key = (patient_id, eyeball)
                # 只保留每个病人每只眼球的第一张采样图片
                if key not in oct_images:
                    oct_images[key] = os.path.join(subdir, file)

# 匹配 Fundus 和 OCT 图像
pairs = []
for key in fundus_images:
    if key in oct_images:
        pairs.append((fundus_images[key], oct_images[key], key))

# 划分训练和测试集（80% 训练，20% 测试）
train_pairs, test_pairs = train_test_split(pairs, test_size=0.2, random_state=42)

# 缩放图像到 512x512 并保存到目标文件夹，同时修改文件名
def resize_and_save_images(pairs, folder_A, folder_B, size=(512, 512)):
    for fundus_image, oct_image, (patient_id, eyeball) in pairs:
        # 修改文件名格式为 "patient_id_eyeball.jpg"
        new_filename = f"{patient_id}_{eyeball}.jpg"

        # 处理 Fundus 图像
        img = Image.open(fundus_image)
        img_resized = img.resize(size)
        img_resized.save(os.path.join(folder_A, new_filename))

        # 处理 OCT 图像
        img = Image.open(oct_image)
        img_resized = img.resize(size)
        img_resized.save(os.path.join(folder_B, new_filename))

# 处理训练集
resize_and_save_images(train_pairs, output_dirs["train_A"], output_dirs["train_B"])

# 处理测试集
resize_and_save_images(test_pairs, output_dirs["test_A"], output_dirs["test_B"])

print("图片已成功组织到训练和测试文件夹中，并缩放到 512x512，同时调整了文件名！")


图片已成功组织到训练和测试文件夹中，并缩放到 512x512，同时调整了文件名！


In [2]:
import os
import json
from pathlib import Path

# 定义输出数据的主文件夹路径
output_folder = Path('./data/oct17/')


# 获取 train_CF 和 test_CF 文件夹路径
train_cf_folder = output_folder / 'train_A'
test_cf_folder = output_folder / 'test_A'

# 为 train_prompts.json 生成文件内容
train_prompts = {}
for image_path in train_cf_folder.glob("*.jpg"):
    image_name = image_path.name  # 仅获取文件名
    if 'OI' in image_name:
        train_prompts[image_name] = "This is the color fundus image of left eye, please generate corresponding optical coherence tomography(OCT) image"
    else:
        train_prompts[image_name] = "This is the color fundus image of right eye, please generate corresponding optical coherence tomography(OCT) image"
        
# 将 train_prompts 写入 train_prompts.json
train_prompts_path = output_folder / 'train_prompts.json'
with open(train_prompts_path, 'w') as f:
    json.dump(train_prompts, f, indent=2)
    
# 为 test_prompts.json 生成文件内容
test_prompts = {}
for image_path in test_cf_folder.glob("*.jpg"):
    image_name = image_path.name  # 仅获取文件名
    if 'OI' in image_name:
        test_prompts[image_name] = "This is the color fundus image of left eye, please generate corresponding optical coherence tomography(OCT) image"
    else:
        test_prompts[image_name] = "This is the color fundus image of right eye, please generate corresponding optical coherence tomography(OCT) image"
        

# 将 test_prompts 写入 test_prompts.json
test_prompts_path = output_folder / 'test_prompts.json'
with open(test_prompts_path, 'w') as f:
    json.dump(test_prompts, f, indent=2)


print("所有 JSON 文件已生成！")

所有 JSON 文件已生成！
