In [1]:
import torch
import torchvision
from torchvision import transforms
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import os
from bs4 import BeautifulSoup
from PIL import Image
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# load data

### 因為xml檔案讀取後是 object ，所以需要用 beautifulSoup 套件轉成 text
### 需要轉換的項目有 boxes , label
### 最後將其輸出成已經是 tensor 的 dic

In [2]:
def get_box(obj):
    xmin = int(obj.find('xmin').text)
    ymin = int(obj.find('ymin').text)
    xmax = int(obj.find('xmax').text)
    ymax = int(obj.find('ymax').text)
    return [xmin,ymin,xmax,ymax]

def get_label(obj):
    if obj.find('name').text =='with_mask' :
        return 1 
    elif obj.find('name').text =='without_mask':
        return 2 
    return 0

In [3]:
def obj_to_target(image_id , file):
    with open (file) as f:
        data = f.read()
        soup = BeautifulSoup(data,'xml')
        objects = soup.find_all('object')
        obj_len = len(objects)
        boxes = []
        labels = []
        for i in objects:
            boxes.append(get_box(i))
            labels.append(get_label(i))

        boxes = torch.as_tensor(boxes,dtype = torch.float32)
        labels = torch.as_tensor(labels,dtype = torch.float32)
        image_id = torch.as_tensor([image_id],dtype = torch.float32)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        
        return target

In [4]:
imgs = list(sorted(os.listdir("images/")))
labels = list(sorted(os.listdir("annotations/")))

### Create DataSet

In [5]:
class MaskDataset(object):
    def __init__(self,trasforms):
        self.trasforms = trasforms
        self.imgs = list(sorted(os.listdir("images/")))
    
    def __getitem__(self,idx):
        file_image = "maksssksksss" + str(idx) + ".png"
        file_labels = "maksssksksss" + str(idx) + ".xml"
        image_path = os.path.join("images/",file_image)
        label_path = os.path.join("annotations/",file_labels)
        img = Image.open(image_path).convert('RGB')
        target = obj_to_target(idx, label_path)

        if self.trasforms != None:
            img = self.trasforms(img)
        return img , target

    def __len__(self):
        return len(self.imgs)

In [6]:
data_transform = transforms.Compose([
        transforms.ToTensor(), 
    ])
def collate_size(batch):
    return tuple(zip(*batch))

In [7]:
dataset = MaskDataset(data_transform)
data_loader = torch.utils.data.DataLoader(dataset,batch_size=4,collate_fn=collate_size)

# model

In [8]:
def get_model(num):
    # get pretrain model 
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained= True)
    # set new feature layers
    in_feature = model.roi_heads.box_predictor.cls_score.in_features
    # replace the new layer with parameter
    model.roi_heads.box_predictor  = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_feature, num_classes=num)
    return model

    
# none - withmask - withoutmask
model = get_model(3)

In [9]:
EPOCHS = 25
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]

optimizer = torch.optim.SGD(params,lr= 5e-3 , momentum=9e-1 , weight_decay= 5e-4)
len_loader = len(data_loader)

for epoch in range(EPOCHS):
    model.train()
    epoch_loss = 0
    i = 0 
    for imgs , annotations in data_loader:
        i+=1
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k,v in t.items()} for t in annotations]

        loss_dict = model([imgs[0]],[annotations[0]])
        losses = sum(loss for loss  in  loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        epoch_loss += losses
    print(epoch_loss)

	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  ..\torch\csrc\utils\python_arg_parser.cpp:766.)
  keep = keep.nonzero().squeeze(1)


AssertionError: target labels must of int64 type

In [10]:
annotations

[{'boxes': tensor([[ 79., 105., 109., 142.],
          [185., 100., 226., 144.],
          [325.,  90., 360., 141.]], device='cuda:0'),
  'labels': tensor([2., 1., 2.], device='cuda:0'),
  'image_id': tensor([0.], device='cuda:0')},
 {'boxes': tensor([[321.,  34., 354.,  69.],
          [224.,  38., 261.,  73.],
          [299.,  58., 315.,  81.],
          [143.,  74., 174., 115.],
          [ 74.,  69.,  95.,  99.],
          [191.,  67., 221.,  93.],
          [ 21.,  73.,  44.,  93.],
          [369.,  70., 398.,  99.],
          [ 83.,  56., 111.,  89.]], device='cuda:0'),
  'labels': tensor([1., 1., 1., 1., 1., 1., 1., 1., 2.], device='cuda:0'),
  'image_id': tensor([1.], device='cuda:0')},
 {'boxes': tensor([[ 68.,  42., 105.,  69.],
          [154.,  47., 178.,  74.],
          [238.,  34., 262.,  69.],
          [333.,  31., 366.,  65.]], device='cuda:0'),
  'labels': tensor([1., 1., 1., 0.], device='cuda:0'),
  'image_id': tensor([2.], device='cuda:0')},
 {'boxes': tensor([[ 