In [27]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from bs4 import BeautifulSoup
import torchvision
from torchvision import transforms, datasets, models
import torch
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from PIL import Image
import matplotlib.pyplot as plt
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
import matplotlib.patches as patches
import os
from importlib import reload
import cvn_utils
import kaggle_utils
import postprocess
import math
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
!nvidia-smi

Sun Sep 20 00:28:52 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.87.01    Driver Version: 418.87.01    CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   44C    P0    28W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [3]:
torch.cuda.is_available()

True

## Find image file from label file

In [4]:
def is_kaggle_annotation_file(file_name):
    return 'annotations' in file_name and 'xml' in file_name
 
def is_cvn_json_file(file_name):
    return ('faces' in file_name) and ('json' in file_name)

def cvn_json_has_box(json_fn):
    processor = cvn_utils.JsonFileProcessor(json_fn)
    processor.load()
    return len(processor.resized_boxes) != 0
        

### Walk over annotation files of both kaggle and cvn

In [5]:
reload(cvn_utils)
kaggle_label_fns = []
cvn_label_fns = []
for dirname, _, filenames in os.walk('/home/yangxu/face_mask_detection_workspace/'):
    for filename in filenames:
        annotation_full_name = os.path.join(dirname, filename)
        if is_kaggle_annotation_file(annotation_full_name):
            kaggle_label_fns.append(annotation_full_name)
        elif is_cvn_json_file(annotation_full_name):              
            if cvn_json_has_box(annotation_full_name):
                cvn_label_fns.append(annotation_full_name)
            else:
                continue
        else:
            continue        
           
print ('kaggle file num = {}, cvn file num = {}, all label file num = {}'.format(
        len(kaggle_label_fns), 
        len(cvn_label_fns),
        len(kaggle_label_fns) + len(cvn_label_fns)))


kaggle file num = 853, cvn file num = 517, all label file num = 1370


In [6]:
for cvn_label_fn in cvn_label_fns:
    processor = cvn_utils.JsonFileProcessor(cvn_label_fn)
    processor.load()
    if processor.get_num_shapes() >= 3:
        print(cvn_label_fn, processor.get_num_shapes())
        break

/home/yangxu/face_mask_detection_workspace/data/cvn/office/faces/office_2020-07-17 15_10_25.204000_img.json 3


In [7]:
class MaskDataset(object):
    def __init__(self, transforms):
        self.transforms = transforms        
        self.all_label_fns = kaggle_label_fns + cvn_label_fns
#         self.all_label_fns = cvn_label_fns

    def gen_target_kaggle(self, idx, label_fn):       
        return kaggle_utils.generate_target(idx, label_fn)
        
    def __getitem__(self, idx):        
        img = None
        taret = None
        
        label_fn = self.all_label_fns[idx]        
        
        #Generate Label
        if is_kaggle_annotation_file(label_fn):
            img_fn = kaggle_utils.find_image_file_kaggle(label_fn)
            img = Image.open(img_fn).convert("RGB") 
            target = kaggle_utils.generate_target(idx, label_fn)
            if self.transforms is not None:
                img = self.transforms(img)
        else:
            processor = cvn_utils.JsonFileProcessor(label_fn)
            processor.load()
            img = processor.generate_image_tensor()
            target = processor.generate_target(idx)

        return img, target

    def __len__(self):
        return len(self.all_label_fns)

In [8]:
data_transform = transforms.Compose([
        transforms.ToTensor(), 
    ])

In [9]:
def collate_fn(batch):
    return tuple(zip(*batch))

dataset = MaskDataset(data_transform)



In [10]:
validation_split = .2
random_seed= 42
shuffle_dataset = True

dataset_size = len(dataset)
print('dataset size =', dataset_size)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_data_loader = torch.utils.data.DataLoader(
 dataset, batch_size=4, collate_fn=collate_fn, num_workers=1, sampler=train_sampler)
valid_data_loader = torch.utils.data.DataLoader(
 dataset, batch_size=4, collate_fn=collate_fn, num_workers=1, sampler=valid_sampler)

print('train data len = {}, validation data len = {}'.format(len(train_data_loader), len(valid_data_loader)))

dataset size = 1370
train data len = 274, validation data len = 69


In [11]:
def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

In [12]:
model = get_model_instance_segmentation(3)

In [13]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


In [14]:
model.load_state_dict(torch.load('model.pt'))

<All keys matched successfully>

In [15]:
# num_epochs = 5
# model.to(device)
    
# # parameters
# params = [p for p in model.parameters() if p.requires_grad]
# optimizer = torch.optim.SGD(params, lr=0.005,
#                                 momentum=0.9, weight_decay=0.0005)

# len_dataloader = len(train_data_loader)

# for epoch in range(num_epochs):
#     model.train()
#     i = 0    
#     train_epoch_loss = 0
#     valid_epoch_loss = 0
    
#     # training
#     for imgs, annotations in train_data_loader:
#         i += 1
        
#         imgs = list(img.to(device) for img in imgs)
#         annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
               
#         loss_dict = model(imgs, annotations)
#         losses = sum(loss for loss in loss_dict.values())      
            
#         optimizer.zero_grad()
#         losses.backward()
#         optimizer.step() 
#         train_epoch_loss += losses
# #         if i % 10 == 0:
# #             print('== image size: {}, iteration loss: {}, {}'.format(len(imgs), i, losses))
#     print('==== Training epoch = {}, train_epoch_loss = {}'.format(epoch, train_epoch_loss))
   
# #     # validation
# #     with torch.no_grad():        
           
# #         for imgs, annotations in valid_data_loader:        
# #             imgs = list(img.to(device) for img in imgs)
# #             annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]

# #             loss_dict = model(imgs, annotations)
# #             losses = sum(loss for loss in loss_dict.values())      

# #             valid_epoch_loss += losses
# #         print('==== Validation epoch = {}, valid_epoch_loss = {}'.format(epoch, valid_epoch_loss))
        

In [16]:
# torch.save(model.state_dict(),'model.pt')

In [17]:
def plot_image(img_tensor, target_tensor):
    fig,ax = plt.subplots(1)
    img = img_tensor.cpu().data

    # Display the image
    ax.imshow(img.permute(1, 2, 0))

    labels = target_tensor['labels'].detach().cpu().numpy()
    boxes = target_tensor['boxes'].detach().cpu().numpy()
    assert len(labels) == len(boxes)
    
    for idx in range(len(labels)):
        xmin, ymin, xmax, ymax = boxes[idx]
        label = labels[idx]
        if label == 1:
            rect = patches.Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),linewidth=1,edgecolor='g',facecolor='none')
        elif label == 2:
            rect = patches.Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),linewidth=1,edgecolor='y',facecolor='none')
        else:
            rect = patches.Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),linewidth=1,edgecolor='r',facecolor='none')
        
#     for box in target_tensor['boxes'].detach().cpu().numpy():
#         xmin, ymin, xmax, ymax = box
#         rect = patches.Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),linewidth=1,edgecolor='g',facecolor='none')

        # Add the patch to the Axes
        ax.add_patch(rect)

    plt.show()    

In [18]:
for imgs, annotations in valid_data_loader:
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
        break


In [19]:
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d()
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d()
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d()
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d()
          (relu): ReLU(inplace=True)
          (downsample): Sequent

In [20]:
model.cuda()
preds = model(imgs)

	nonzero(Tensor input, *, Tensor out)
Consider using one of the following signatures instead:
	nonzero(Tensor input, *, bool as_tuple)


In [21]:
preds[2]

{'boxes': tensor([[ 29.6072,  65.1732,  47.5958,  81.1872],
         [ 44.3625,  39.7267,  65.9259,  62.7433],
         [317.5525, 249.1433, 332.8996, 265.2899],
         [ 11.3504, 217.8923,  64.1109, 263.6417],
         [303.1713, 209.4491, 325.1876, 231.2921],
         [ 85.1499, 238.1956, 135.4464, 282.2182],
         [339.7503, 248.1924, 366.9675, 273.3849]], device='cuda:0',
        grad_fn=<StackBackward>),
 'labels': tensor([1, 1, 1, 1, 1, 1, 1], device='cuda:0'),
 'scores': tensor([0.9989, 0.9988, 0.9984, 0.9984, 0.9962, 0.9924, 0.9920],
        device='cuda:0', grad_fn=<IndexBackward>)}

In [22]:
labels = preds[1]['labels'].detach().cpu().numpy()
boxes = preds[1]['boxes'].detach().cpu().numpy()
label = labels[0]

In [23]:
imgs[1].shape[1]

225

In [28]:
from postprocess import PredictionPostProcessor

In [29]:
import json
postprocessor = PredictionPostProcessor()
postprocessor.convert_prediction_tensors_to_json_file(preds[2], 'testing.json')

<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>


In [None]:
! less testing.json

In [None]:
plot_image(imgs[1], preds[1])

In [None]:
plot_image(imgs[2], preds[2])

In [None]:
plot_image(imgs[3], preds[3])