In [7]:
import torch
import numpy as np
from PIL import ImageDraw
import random
from pycocotools.coco import COCO
from pathlib import Path
import matplotlib.pyplot as plt
from PIL import Image
from torch.utils.data import Dataset
import pandas as pd
from tqdm import tqdm
import cv2
import numpy as np
import sys
from tqdm import trange

In [5]:
class WheatDataset(Dataset):
    """
    for cocodataset
    """

    def __init__(self, hyp):
        self.data_dir = Path(hyp['data_dir'])
        self.img_dir = self.data_dir / "train"
        self.box_path = self.data_dir / "train.csv"
        assert self.box_path.exists(), f"csv file not found in: {self.box_path}"
        
        self.csv = pd.read_csv(str(self.box_path))
        self.img_ids = self.csv['image_id'].unique()
        self.db = self._get_db()
        self.num_class = 1
        super(Dataset, self).__init__()

    def __len__(self):
        return len(self.img_ids)

    def _get_db(self):
        box_all = {}
        for img_id in tqdm(self.img_ids, total=len(self.img_ids), file=sys.stdout, desc="loading dataset ... ... ", ncols=100):
            df = self.csv[self.csv['image_id'] == img_id]
            h = df.iloc[0, 1]
            w = df.iloc[0, 2]
            boxes = df['bbox']
            xyxy = []
            for box in boxes:
                box = box.replace('[', '')
                box = box.replace(']', '')
                box_list = box.split(',')
                box_out = [float(x) for x in box_list]
                xmin = box_out[0]
                ymin = box_out[1]
                xmax = box_out[0] + box_out[2]
                ymax = box_out[1] + box_out[3]
                box_out = [xmin, ymin, xmax, ymax]
                xyxy.append(box_out)
            xyxy = np.stack(xyxy, axis=0)
            box_all[img_id] = {'height': h, 'width': w, 'boxes': xyxy}
        return box_all

    def _get_one_example(self, item):
        """

        :param item:
        :return: img; [xmin, ymin, xmax, ymax]
        """
        img_id = self.img_ids[item]
        img_path = self.img_dir / f"{img_id}.jpg"
        img_arr = cv2.imread(str(img_path))
        img_arr = img_arr[:, :, ::-1].copy()
        box = self.db[img_id]['boxes']
        return img_id, img_arr, box

In [6]:
hyp = {'data_dir': "/Volumes/Samsung/Dataset/GlobalWheat/"}

wheat = WheatDataset(hyp)

loading dataset ... ... : 100%|████████████████████████████████| 3373/3373 [00:24<00:00, 138.92it/s]


In [8]:
import shutil
for i in trange(len(wheat), total=len(wheat)):
    ix, im, box = wheat._get_one_example(i)
    src = Path("/Volumes/Samsung/Dataset/GlobalWheat/train/") / f"{ix}.jpg"
    dst = Path("/Volumes/Samsung/Dataset/GlobalWheat/image/") / f"{ix}.jpg"
    shutil.copy(str(src), str(dst))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3373/3373 [01:00<00:00, 55.66it/s]


In [9]:
for i in trange(len(wheat)):
    ix, im, box = wheat._get_one_example(i)
    with open("/Volumes/Samsung/Dataset/GlobalWheat/label/" + f"{ix}.txt", 'w') as f:
        for b in box:
            f.write(f"0 {b[0]} {b[1]} {b[2]} {b[3]}\n")

In [10]:
import cv2
import json
import numpy as np
from abc import ABC
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt


class COCOGenerator(ABC):

    def __init__(self, data_dir, setname, use_crowd):
        self.setname = setname
        self.data_dir = Path(data_dir)
        self.use_crowd = use_crowd
        self.annpath = self.data_dir / 'annotations' / f'instances_{setname}.json'
        self.coco = COCO(self.annpath)
        self.img_ids = self.coco.getImgIds()
        self.classes, self.class2id, self.id2class, self.class2label, self.label2class = [], {}, {}, {}, {}
        self.get_coco_info()
        self.num_class = len(self.classes)
        super(COCOGenerator, self).__init__()

    def get_coco_info(self):
        """
        pass
        """
        category_info = self.coco.loadCats(ids=self.coco.getCatIds())
        category_info.sort(key=lambda x: int(x['id']))
        for category in category_info:
            # self.classes.append(category['id'])
            self.class2id[len(self.classes)] = category['id']
            self.id2class[category['id']] = len(self.classes)
            self.class2label[len(self.classes)] = category['name']
            self.label2class[category['name']] = len(self.classes)
            self.classes.append(category['id'])

    def __len__(self):
        return len(self.img_ids)

    def size(self):
        """
        Size of COCO dataset.
        :return:
        """
        return len(self.img_ids)

    def has_label(self, label):
        return label in self.class2label

    def has_name(self, name):
        return name in self.label2class

    def label_to_name(self, label):
        return self.class2label[label]

    def name_to_label(self, name):
        return self.label2class[name]

    def coco_id_to_class(self, coco_id):
        return self.id2class[coco_id]

    def coco_id_to_label(self, coco_id):
        return self.class2label[self.coco_id_to_class(coco_id)]

    def get_img_path(self, idx):
        return str(self.data_dir / self.setname / f'{self.img_ids[idx]:>012}.jpg')

    def load_annotations(self, idx):
        """

        :param idx:
        :return: return box formate -> [xmin, ymin, xmax, ymax]
        """
        ann_ids = self.coco.getAnnIds(imgIds=self.img_ids[idx], iscrowd=self.use_crowd)
        annotations = {'classes': np.empty((0,), dtype=np.uint16), 'bboxes': np.empty((0, 4), dtype=np.float32)}
        # 有些img没有annotations
        if len(ann_ids) == 0:
            return annotations
        else:
            # anns is a list
            anns = self.coco.loadAnns(ids=ann_ids)
            for idx, ann in enumerate(anns):
                # 有些annotations中bbox的width/height值小于1，遇到这样的bbox就舍弃掉
                if (ann['bbox'][2]) < 1 or (ann['bbox'][3] < 1):
                    continue
                else:
                    annotations['classes'] = np.concatenate([annotations['classes'],
                                                             [self.coco_id_to_class(ann['category_id'])]],
                                                            axis=0)
                    annotations['bboxes'] = np.concatenate([annotations['bboxes'],
                                                            [[ann['bbox'][0],
                                                              ann['bbox'][1],
                                                              ann['bbox'][0] + ann['bbox'][2],
                                                              ann['bbox'][1] + ann['bbox'][3]]]],
                                                           axis=0)
        return annotations

In [18]:
coco = COCOGenerator("/Volumes/Samsung/Dataset/COCO/", "val2017", False)

loading annotations into memory...
Done (t=0.37s)
creating index...
index created!


In [20]:
for i in tqdm(range(len(coco)), total=len(coco)):
    ann = coco.load_annotations(i)
    img_id = coco.img_ids[i]
    box = ann['bboxes']
    cls_ = ann['classes']
    img_src = coco.get_img_path(i)
    assert Path(img_src).exists()
    img_dst = Path("/Volumes/Samsung/Dataset/COCO/val_dataset/image/") / f"{img_id}.jpg"
    shutil.copy(str(img_src), str(img_dst))
    with open("/Volumes/Samsung/Dataset/COCO/val_dataset/label/" + f"{img_id}.txt", 'w') as f:
        for c, b in zip(cls_, box):
            f.write(f"{c} {b[0]} {b[1]} {b[2]} {b[3]}\n")

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [00:08<00:00, 571.71it/s]


In [21]:
with open("/Volumes/Samsung/Dataset/COCO/val_dataset/names.txt", 'w') as f:
    for k, v in coco.class2label.items():
        f.write(f"{k} {v}\n")

In [14]:
a = np.empty(shape=[1, 5])

In [16]:
a[:, 1:]

array([[0., 0., 0., 0.]])

In [17]:
a.shape

(1, 5)