In [3]:
import json
import shutil
import xml.etree.ElementTree as ET
from pathlib import Path
import os

In [9]:
# Load COCO categories
coco_categories_path = 'coco/annotations/instances_train2014.json'
print(os.path.exists(coco_categories_path))
with open(coco_categories_path) as f:
    coco_data = json.load(f)

True


In [48]:
coco_categories = coco_data['categories'] 
type(coco_categories)
print(coco_categories[0])
# get categories names by id: coco_categories[id]['name']

{'supercategory': 'person', 'id': 1, 'name': 'person'}


In [171]:
common_categories=[
    'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car','elephant',
    'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
    'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]
len(common_categories)

20

In [77]:
# Identify all images containing elephants
annotations = []
elephant_images_coco = set()
elephant_id = next((category['id'] for category in coco_data['categories'] if category['name'] == 'elephant'), None) # would be 22
if elephant_id is not None:
    for annotation in coco_data['annotations']:
        if annotation['category_id'] == elephant_id:
            annotations.append(annotation)

In [93]:
# annotations = coco_data['annotations']
images = coco_data['images']
categories = coco_data['categories']
merged_data_corrected = []

for annotation in annotations:
    if annotation['category_id'] != elephant_id:
        print(annotation)
    # 使用annotation中的image_id在images列表中找到对应的条目
    image = next((item for item in images if item['id'] == annotation['image_id']), None)
    category = next((item for item in categories if item['id'] == annotation['category_id']), None)
    # 如果找到了对应的条目，则合并信息
    if image:
        merged_dict = {**image, **annotation, **category}  # 合并字典，注意去掉重复的'image_id'
        # 移除重复的'image_id'字段
        if 'image_id' in merged_dict:
            merged_dict.pop('image_id')
        merged_data_corrected.append(merged_dict)

In [172]:
from pycocotools.coco import COCO
from pascal_voc_writer import Writer
import argparse
import os


def coco2voc(ann_file, output_dir):
    coco = COCO(ann_file)
    cats = coco.loadCats(coco.getCatIds())
    cat_idx = {}
    for c in cats:
        cat_idx[c['id']] = c['name']
    for img in coco.imgs:
        catIds = coco.getCatIds()
        annIds = coco.getAnnIds(imgIds=[img], catIds=catIds)
        if len(annIds) > 0:
            img_fname = coco.imgs[img]['file_name']
            image_fname_ls = img_fname.split('.')
            image_fname_ls[-1] = 'xml'
            label_fname = '.'.join(image_fname_ls)
            writer = Writer(img_fname, coco.imgs[img]['width'], coco.imgs[img]['height'])
            anns = coco.loadAnns(annIds)
            categories = []
            for ann in anns:
                catname = cat_idx[ann['category_id']]
                categories.append(catname)
            if 'elephant' in categories:
                for a in anns:
                    bbox = a['bbox']
                    bbox = [bbox[0], bbox[1], bbox[2] + bbox[0], bbox[3] + bbox[1]]
                    bbox = [str(b) for b in bbox]
                    catname = cat_idx[a['category_id']]
                    catname = catname.replace(' ','')
                    if catname in common_categories:
                        # if catname == 'elephant':
                            # print(img_fname)
                        writer.addObject(catname, bbox[0], bbox[1], bbox[2], bbox[3])
                        writer.save(output_dir+'/'+label_fname)

In [174]:
val_path = 'coco/annotations/instances_train2014.json'
coco2voc(val_path, output_dir='val_coco_voc')

loading annotations into memory...
Done (t=8.82s)
creating index...
index created!


In [175]:
# remove <path> tag in coco_voc xml
def remove_path_element(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    for path in root.findall('path'):
        root.remove(path)
    tree.write(xml_file)

remove_dir = 'val_coco_voc'
for file in os.listdir(remove_dir):
    remove_path_element(os.path.join(remove_dir, file))

In [13]:
## remove cat class in VOC
voc_annotations_dir = Path('VOCdevkit/VOC2007/Annotations')
print(os.path.exists(voc_annotations_dir))

for xml_file in voc_annotations_dir.glob('*.xml'):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    filename = root.find('filename').text
    # Assuming COCO and VOC filenames can be correlated or mapped somehow
    # This part of the code might need to be adjusted based on your filename mapping strategy

    for object_tag in root.findall('object'):
        voc_category = object_tag.find('name').text
        if voc_category == 'cat':
            # Remove 'cat' instances, or replace them based on your specific logic
            root.remove(object_tag)
             # Implement additional logic here if you need to handle other categories based on overlap rules
    
    # Save the modified XML back to file
    tree.write(xml_file)

True


In [177]:
import shutil

data_root = 'coco/train2014'
find_dir = 'val_coco_voc'
img_root = 'coco/train2014'
target_dir = 'VOCdevkit/VOC2007/JPEGImages'

failed_files = []
for file in os.listdir(find_dir):
    if file.endswith('.xml'):
        file_name = file.split('.')[0]+'.jpg'
        img_pth = os.path.join(img_root, file_name)
        if os.path.exists(img_pth):
            # copy file to target dir
            shutil.copy(img_pth, target_dir)
        else:
            failed_files.append('{}.jpg'.format(file_name))

In [182]:
# prepare the mainfest files
import os
import random

# 假设所有的JPEG图像都存放在此目录下
image_dir = 'VOCdevkit/VOC2007/JPEGImages'
image_files = [os.path.splitext(f)[0] for f in os.listdir(image_dir) if f.endswith('.jpg')]

# 划分比例
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1
# 剩余的用于测试

# 随机打乱图像文件名
random.shuffle(image_files)

# 划分训练、验证和测试集
num_images = len(image_files)
num_train_val = int(num_images * (train_ratio+val_ratio))

train_val_files = image_files[:num_train_val]
test_files = image_files[num_train_val:]

In [183]:
# ImageSets/Main目录,并且把对应的文件放进去
image_sets_path = 'VOCdevkit/VOC2007/ImageSets/Main'
os.makedirs(image_sets_path, exist_ok=True)

# 写入train.txt, val.txt, test.txt
for split_name, split_files in zip(['trainval', 'test'], [train_val_files, test_files]):
    with open(os.path.join(image_sets_path, f'{split_name}_voc_coco.txt'), 'w') as file:
        for filename in split_files:
            file.write(f"{filename}\n")

In [184]:
len(test_files)

1220