In [2]:
import glob
import os.path as osp
import numpy as np
import json

In [3]:
def read_json(fpath):
    with open(fpath, 'r') as f:
        obj = json.load(f)
    return obj

In [4]:
class PRID(object):
    """
    PRID
    
    Dataset statistics:
    # identities: 200
    # tracklets: 400
    # cameras: 2
    """
    root = './prid2011'
    dataset_url = 'https://files.icg.tugraz.at/f/6ab7e8ce8f/?raw=1'
    split_path = osp.join(root, 'splits_prid2011.json')
    cam_a_path = osp.join(root, 'prid_2011', 'multi_shot', 'cam_a')
    cam_b_path = osp.join(root, 'prid_2011', 'multi_shot', 'cam_b')

    def __init__(self, split_id=0, min_seq_len=0):
        self._check_before_run()
        splits = read_json(self.split_path)
        if split_id >=  len(splits):
            raise ValueError("split_id exceeds range, received {}, but expected between 0 and {}".format(split_id, len(splits)-1))
        split = splits[split_id]
        train_dirs, test_dirs = split['train'], split['test']
        print("# train identites: {}, # test identites {}".format(len(train_dirs), len(test_dirs)))

        train, num_train_tracklets, num_train_pids, num_imgs_train = \
          self._process_data(train_dirs, cam1=True, cam2=True)
        query, num_query_tracklets, num_query_pids, num_imgs_query = \
          self._process_data(test_dirs, cam1=True, cam2=False)
        gallery, num_gallery_tracklets, num_gallery_pids, num_imgs_gallery = \
          self._process_data(test_dirs, cam1=False, cam2=True)

        num_imgs_per_tracklet = num_imgs_train + num_imgs_query + num_imgs_gallery
        min_num = np.min(num_imgs_per_tracklet)
        max_num = np.max(num_imgs_per_tracklet)
        avg_num = np.mean(num_imgs_per_tracklet)

        num_total_pids = num_train_pids + num_query_pids
        num_total_tracklets = num_train_tracklets + num_query_tracklets + num_gallery_tracklets

        print("=> PRID-2011 loaded")
        print("Dataset statistics:")
        print("  ------------------------------")
        print("  subset   | # ids | # tracklets")
        print("  ------------------------------")
        print("  train    | {:5d} | {:8d}".format(num_train_pids, num_train_tracklets))
        print("  query    | {:5d} | {:8d}".format(num_query_pids, num_query_tracklets))
        print("  gallery  | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_tracklets))
        print("  ------------------------------")
        print("  total    | {:5d} | {:8d}".format(num_total_pids, num_total_tracklets))
        print("  number of images per tracklet: {} ~ {}, average {:.1f}".format(min_num, max_num, avg_num))
        print("  ------------------------------")

        self.train = train
        self.query = query
        self.gallery = gallery

        self.num_train_pids = num_train_pids
        self.num_query_pids = num_query_pids
        self.num_gallery_pids = num_gallery_pids

    def _check_before_run(self):
        """Check if all files are available before going deeper"""
        if not osp.exists(self.root):
            raise RuntimeError("'{}' is not available".format(self.root))

    def _process_data(self, dirnames, cam1=True, cam2=True):
        tracklets = []
        num_imgs_per_tracklet = []
        dirname2pid = {dirname:i for i, dirname in enumerate(dirnames)}
        
        for dirname in dirnames:
            if cam1:
                person_dir = osp.join(self.cam_a_path, dirname)
                img_names = glob.glob(osp.join(person_dir, '*.png'))
                assert len(img_names) > 0
                img_names = tuple(img_names)
                pid = dirname2pid[dirname]
                tracklets.append((img_names, pid, 0))
                num_imgs_per_tracklet.append(len(img_names))

            if cam2:
                person_dir = osp.join(self.cam_b_path, dirname)
                img_names = glob.glob(osp.join(person_dir, '*.png'))
                assert len(img_names) > 0
                img_names = tuple(img_names)
                pid = dirname2pid[dirname]
                tracklets.append((img_names, pid, 1))
                num_imgs_per_tracklet.append(len(img_names))

        num_tracklets = len(tracklets)
        num_pids = len(dirnames)

        return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet

In [5]:
dataset = PRID()

# train identites: 89, # test identites 89
=> PRID-2011 loaded
Dataset statistics:
  ------------------------------
  subset   | # ids | # tracklets
  ------------------------------
  train    |    89 |      178
  query    |    89 |       89
  gallery  |    89 |       89
  ------------------------------
  total    |   178 |      356
  number of images per tracklet: 28 ~ 675, average 108.1
  ------------------------------


In [6]:
print(type(dataset.train))

<class 'list'>


In [7]:
print(dataset.train)

[(('./prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\0001.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\0002.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\0003.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\0004.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\0005.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\0006.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\0007.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\0008.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\0009.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\0010.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\0011.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\0012.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\0013.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\0014.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0004\\001

In [9]:
print(dataset.gallery)

[(('./prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\0001.png', './prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\0002.png', './prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\0003.png', './prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\0004.png', './prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\0005.png', './prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\0006.png', './prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\0007.png', './prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\0008.png', './prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\0009.png', './prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\0010.png', './prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\0011.png', './prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\0012.png', './prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\0013.png', './prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\0014.png', './prid2011\\prid_2011\\multi_shot\\cam_b\\person_0001\\001

In [10]:
print(dataset.query)

[(('./prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\0001.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\0002.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\0003.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\0004.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\0005.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\0006.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\0007.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\0008.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\0009.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\0010.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\0011.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\0012.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\0013.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\0014.png', './prid2011\\prid_2011\\multi_shot\\cam_a\\person_0001\\001