In [10]:
from pycocotools.coco import COCO
import requests
import os
from tqdm import tqdm

dataFrom = 'coco2017'
dataType = 'train2017' 
annFile = f'{dataFrom}/annotations/instances_{dataType}.json'
coco = COCO(annFile)

# Choose the category ID for 'person'
catIds = coco.getCatIds(catNms=['person'])

# Get all images from the 'person' category
imgIds = coco.getImgIds(catIds=catIds)

# To save images
image = f'{dataFrom}/{dataType}_images'
if not os.path.exists(image):
    os.makedirs(image)


loading annotations into memory...
Done (t=19.26s)
creating index...
index created!


In [12]:
def download_image(img_info):
    img_url = img_info['coco_url']
    img_path = os.path.join(image, img_info['file_name'])

    response = requests.get(img_url, stream=True)
    if response.status_code == 200:
        with open(img_path, 'wb') as f:
            f.write(response.content)


# Download 10,000 images
for img_id in tqdm(imgIds[:10000]):
    img_info = coco.loadImgs(img_id)[0]
    download_image(img_info)

100%|██████████| 10000/10000 [31:56<00:00,  5.22it/s] 


In [13]:
# To save annotations
annotations = f'{dataFrom}/{dataType}_annotations'
if not os.path.exists(annotations):
    os.makedirs(annotations)

In [14]:
import json

def download_annotations(image_id, save_dir):
    annIds = coco.getAnnIds(imgIds=image_id, catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)
    
    ann_path = os.path.join(save_dir, f"{image_id}_annotations.json")
    with open(ann_path, 'w') as file:
        json.dump(anns, file)

# Save the same 10,000 images
for img_id in imgIds[:10000]:
    download_annotations(img_id, annotations)
