In [1]:
import torch
import torchvision
import numpy as np
import os
from PIL import Image
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from my_utils import get_transform, my_dataloader
from engine import train_one_epoch, evaluate

In [2]:
class PennFudanDataset(object):
    def __init__(self, root, transforms):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)
        # convert the PIL Image into a numpy array
        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        
        masks = mask == obj_ids[:, None, None]
        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [3]:
is_cuda = torch.cuda.is_available()
# device = torch.device('cuda' if is_cuda else 'cpu')
device = torch.device('cpu')

path = "/media/zj-linux/工作/cv2_data/PennFudanPed"

# create dataloader
dataset = PennFudanDataset(path, get_transform(True))
dataset_test = PennFudanDataset(path, get_transform(False))

In [4]:
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

data_loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True, num_workers=1)
data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=1, shuffle=True, num_workers=1)

In [5]:
# build model
num_classes = 2
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280

# 制作下anchor的尺寸
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

# 制作roi_pooler
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)

model = FasterRCNN(backbone,
                   num_classes=num_classes,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [6]:
num_epochs = 1

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/zj-linux/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/zj-linux/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 47, in fetch
    return self.collate_fn(data)
  File "/home/zj-linux/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 80, in default_collate
    return [default_collate(samples) for samples in transposed]
  File "/home/zj-linux/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 80, in <listcomp>
    return [default_collate(samples) for samples in transposed]
  File "/home/zj-linux/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/collate.py", line 56, in default_collate
    return torch.stack(batch, 0, out=out)
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 353 and 348 in dimension 2 at /opt/conda/conda-bld/pytorch_1565272271120/work/aten/src/TH/generic/THTensor.cpp:689


In [1]:
import torch

In [6]:
boxes = torch.tensor([[1,2,3,4],[2,3,4,5]])
boxes_1 = torch.tensor([[6,7,8,9],[2,5,9,7]])

In [7]:
torch.max(boxes[:, None, :2], boxes_1[:,:2])

tensor([[[6, 7],
         [2, 5]],

        [[6, 7],
         [2, 5]]])

In [8]:
torch.min(boxes[:, None, 2:], boxes_1[:,2:])

tensor([[[3, 4],
         [3, 4]],

        [[4, 5],
         [4, 5]]])

In [13]:
((torch.min(boxes[:, None, 2:], boxes_1[:,2:]) - torch.max(boxes[:, None, :2], boxes_1[:,:2])).clamp(min=0)[:, :, 0])*((torch.min(boxes[:, None, 2:], boxes_1[:,2:]) - torch.max(boxes[:, None, :2], boxes_1[:,:2])).clamp(min=0)[:, :, 1])

tensor([[0, 0],
        [0, 0]])

In [1]:
import torch

In [2]:
a = torch.tensor([[1,2,3], [4,5,6],[7,8,9]])

In [3]:
a

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [4]:
a.narrow(0,0,1)

tensor([[1, 2, 3]])

In [6]:
a.narrow(0,1,1)

tensor([[4, 5, 6]])

In [12]:
a.narrow(0,0,2)

tensor([[1, 2, 3],
        [4, 5, 6]])

In [15]:
a[:,1]

tensor([2, 5, 8])

In [16]:
torch.nonzero(a > 2)

tensor([[0, 2],
        [1, 0],
        [1, 1],
        [1, 2],
        [2, 0],
        [2, 1],
        [2, 2]])