# Load model

In [2]:
import torch
from maskrcnn import MaskrcnnResnet50FPN

num_classes = 3
weight_path = './weight/2011110823/best_model_31_dice_mAP=0.9705.pt'

In [3]:
model = MaskrcnnResnet50FPN(num_classes=num_classes)
model.load_state_dict(torch.load(weight_path, map_location='cpu'))

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
model.to(device)
model.eval()

MaskrcnnResnet50FPN(
  (model): MaskRCNN(
    (transform): GeneralizedRCNNTransform(
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        Resize(min_size=(800,), max_size=1333, mode='bilinear')
    )
    (backbone): BackboneWithFPN(
      (body): IntermediateLayerGetter(
        (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (bn1): FrozenBatchNorm2d()
        (relu): ReLU(inplace=True)
        (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        (layer1): Sequential(
          (0): Bottleneck(
            (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn1): FrozenBatchNorm2d()
            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn2): FrozenBatchNorm2d()
            (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn3): FrozenBatchNorm2d()
  

# Preprocessing

In [6]:
import cv2
image_size = (768, 768)

def preprocess(image):
    sample = cv2.resize(image, dsize=image_size)
    sample = torch.from_numpy(sample).to(torch.float).to(device)
    samples = sample.unsqueeze(dim=0).permute(0, 3, 1, 2)
    samples = (samples - samples.mean()) / samples.std() #Tùy thuộc vào lúc train norm cách nào
    return image, samples

# Processing

In [7]:
def process(image, samples):
    with torch.no_grad():
        preds = model(samples)
    return image, preds

# Postprocessing

In [8]:
import cv2
import itertools
import numpy as np
from shapely.geometry import box, Point, Polygon


class EnclosingQuadrilateral:
    def __init__(self):
        self.binary_threshold = 0.6
        self.area_threshold = 0.0
        self.vertical_threshold = 20
        self.iou_threshold = 0.8

    def _order_points(self, points):
        assert len(points) == 4, 'Length of points must be 4'
        left = sorted(points, key=lambda p: p[0])[:2]
        right = sorted(points, key=lambda p: p[0])[2:]
        tl, bl = sorted(left, key=lambda p: p[1])
        tr, br = sorted(right, key=lambda p: p[1])
        return [tl, tr, br, bl]

    def _compute_iou(self, polyA, polyB):
        iou = 0.
        polyA = Polygon(polyA)
        polyB = Polygon(polyB)
        if polyA.intersects(polyB):
            iou = polyA.intersection(polyB).area / polyA.union(polyB).area
        return iou

    def _intersection_point(self, line1, line2):
        a1 = line1[1][1] - line1[0][1]
        b1 = line1[0][0] - line1[1][0]
        a2 = line2[1][1] - line2[0][1]
        b2 = line2[0][0] - line2[1][0]
        determinant = a1 * b2 - a2 * b1
        if determinant == 0:
            return None
        c1 = (a1 / determinant) * line1[0][0] + (b1 / determinant) * line1[0][1]
        c2 = (a2 / determinant) * line2[0][0] + (b2 / determinant) * line2[0][1]
        x = b2 * c1 - b1 * c2
        y = a1 * c2 - a2 * c1
        return [int(x), int(y)]

    def _convex_hulls(self, pred, binary_threshold=0.6, area_threshold=0.0, vertical_threshold=20):
        convex_hulls = []
        binary_image = (pred > binary_threshold).astype(np.uint8)
        binary_image = cv2.morphologyEx(binary_image, cv2.MORPH_OPEN, np.ones(shape=(5, 5), dtype=np.uint8))
        num_label, label = cv2.connectedComponents(binary_image)
        for i in range(1, num_label):
            contours, _ = cv2.findContours((label == i).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            contour = contours[0]
            if cv2.contourArea(contour) > area_threshold * pred.size:
                epsilon = 0.009 * cv2.arcLength(contour, closed=True)
                approx_contour = cv2.approxPolyDP(contour, epsilon, closed=True)
                convex_hull = cv2.convexHull(approx_contour)  # approximate contour to reduce num of points
                for inc in range(5):
                    if convex_hull.shape[0] <= vertical_threshold:
                        break
                    epsilon = 0.002 * (1 + inc) * cv2.arcLength(contour, closed=True)
                    convex_hull = cv2.approxPolyDP(convex_hull, epsilon, closed=True)

                if 4 <= convex_hull.shape[0] <= vertical_threshold:
                    convex_hulls.append(np.squeeze(np.array(convex_hull), axis=1))

        return convex_hulls

    def _enclosing_quadrilateral(self, pred, convex_hulls, iou_threshold):
        enclosing_quads = []
        x1, x2 = [-pred.shape[0], 2 * pred.shape[0]]
        y1, y2 = [-pred.shape[1], 2 * pred.shape[1]]
        boundary = box(x1, y1, x2, y2)
        for polygon in convex_hulls:
            num_verticals = len(polygon)
            max_iou = 0.
            enclosing_quad = None
            for (x, y, z, t) in itertools.combinations(range(num_verticals), 4):
                lines = [
                    [polygon[x], polygon[(x + 1) % num_verticals]],
                    [polygon[y], polygon[(y + 1) % num_verticals]],
                    [polygon[z], polygon[(z + 1) % num_verticals]],
                    [polygon[t], polygon[(t + 1) % num_verticals]]
                ]
                points = []
                for i in range(4):
                    point = self._intersection_point(lines[i], lines[(i + 1) % 4])
                    if (not point) or (point in points) or (not boundary.contains(Point(point))):
                        break
                    points.append(point)

                if len(points) == 4 and Polygon(self._order_points(points)).is_valid:
                    candidate_quad = self._order_points(points)
                    iou = self._compute_iou(candidate_quad, polygon)
                    if iou > max_iou and iou > iou_threshold:
                        enclosing_quad = candidate_quad
                        max_iou = iou

            if enclosing_quad:
                enclosing_quads.append(enclosing_quad)

        return enclosing_quads
    
    def perspective_transform(self, original_image, pred_mask, quad):
        width_ratio = original_image.shape[1] / pred_mask.shape[1]
        height_ratio = original_image.shape[0] / pred_mask.shape[0] 
        quad = np.array(quad, dtype=np.float32)
        quad[:, 0] = quad[:, 0] * width_ratio
        quad[:, 1] = quad[:, 1] * height_ratio
        tl, tr, br, bl = quad
        widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
        widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
        maxWidth = max(int(widthA), int(widthB))
        heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
        heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
        maxHeight = max(int(heightA), int(heightB))
        dst = np.array([
            [0, 0],
            [maxWidth - 1, 0],
            [maxWidth - 1, maxHeight - 1],
            [0, maxHeight - 1]], dtype = "float32")
        M = cv2.getPerspectiveTransform(quad, dst)
        warped_image = cv2.warpPerspective(original_image, M, (maxWidth, maxHeight))
        return warped_image, [tl, tr, br, bl]

    def __call__(self, original_image, pred_mask):
        warped_images = []
        convex_hulls = self._convex_hulls(pred_mask,
                                          self.binary_threshold,
                                          self.area_threshold,
                                          self.vertical_threshold)
        enclosing_quads = self._enclosing_quadrilateral(pred_mask,
                                                        convex_hulls,
                                                        self.iou_threshold)
        for quad in enclosing_quads:
            warped_images.append(self.perspective_transform(original_image, pred_mask, quad))

        return warped_images


In [9]:
def postprocess(image, preds):
    pred = preds[0]
    boxes = pred['boxes']
    masks = pred['masks']
    scores = pred['scores']
    labels = pred['labels']
    
    indices = scores > 0.5
    masks = masks[indices]
    labels = labels[indices]
    
    indices = torchvision.ops.nms(boxes, scores, 0.5)
    masks = masks[indices]
    scores = scores[indices]
    labels = labels[indices]  
    
    labels = labels.detach().cpu().numpy()
    masks = masks.squeeze(1).detach().cpu().numpy()
    
    warped_images = []
    min_enclosing_quad = EnclosingQuadrilateral()
    for mask in masks:
        warped_image = min_enclosing_quad(image, mask)
        warped_images += warped_image
    
    return warped_images

# Find Closing Quad

In [10]:
pred_mask = cv2.imread('./test/mask.jpg', cv2.IMREAD_GRAYSCALE) / 255.

enclosing_quad = EnclosingQuadrilateral()
convex_hulls = enclosing_quad._convex_hulls(pred_mask, 0.6, 0, 20)
enclosing_quads = enclosing_quad._enclosing_quadrilateral(pred_mask,
                                                convex_hulls,
                                                0.8)

for quad in enclosing_quads:
    quad = np.int32(quad)
    test_quad = np.stack([pred_mask] * 3, axis=2)
    cv2.polylines(test_quad, [quad], True, (255, 0, 0), 3)
    cv2.imshow('find quad', test_quad)
    cv2.waitKey()
    cv2.destroyAllWindows()

# TEST

In [11]:
import torchvision
test_image = cv2.imread('test/GiayCMND.png')
image, samples = preprocess(test_image)
image, preds = process(image, samples)
warped_images = postprocess(image, preds)

	nonzero(Tensor input, *, Tensor out)
Consider using one of the following signatures instead:
	nonzero(Tensor input, *, bool as_tuple)


In [12]:
for warped_image in warped_images:
    cv2.imshow('result', warped_image)
    cv2.waitKey()
    cv2.destroyAllWindows()

TypeError: Expected Ptr<cv::UMat> for argument 'mat'

# Combination

In [13]:
import torch
from maskrcnn import MaskrcnnResnet50FPN


class CardExtraction:
    def __init__(self, num_classes, weight_path, image_size):
        self.image_size = image_size
        self.model = MaskrcnnResnet50FPN(num_classes=num_classes)
        self.model.load_state_dict(torch.load(weight_path, map_location='cpu'))
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)
        self.model.eval()
    
    def preprocess(self, image):
        sample = cv2.resize(image, dsize=self.image_size)
        sample = torch.from_numpy(sample).to(torch.float).to(self.device)
        samples = sample.unsqueeze(dim=0).permute(0, 3, 1, 2)
        samples = (samples - samples.mean()) / samples.std() 
        return image, samples
    
    def process(self, image, samples):
        with torch.no_grad():
            preds = self.model(samples)
        return image, preds
            
    def postprocess(self, image, preds):
        pred = preds[0]
        boxes = pred['boxes']
        masks = pred['masks']
        scores = pred['scores']
        labels = pred['labels']

        indices = torchvision.ops.nms(boxes, scores, 0.5)
        masks = masks[indices]
        scores = scores[indices]
        labels = labels[indices]  
        
        indices = scores > 0.5
        masks = masks[indices]
        labels = labels[indices]


        labels = labels.detach().cpu().numpy()
        masks = masks.squeeze(1).detach().cpu().numpy()

        warped_images = []
        min_enclosing_quad = EnclosingQuadrilateral()
        for mask in masks:
            warped_image = min_enclosing_quad(image, mask)
            warped_images += warped_image

        return warped_images
    
    def __call__(self, *args):
        output = self.preprocess(*args)
        output = self.process(*output)
        output = self.postprocess(*output)
        return output

In [14]:
test_image = cv2.imread('test/GiayCMND.png')
card_extractor = CardExtraction(3, 
                                './weight/2011110823/best_model_31_dice_mAP=0.9705.pt', 
                                image_size=(768, 768))
warped_images = card_extractor(test_image)

In [15]:
for warped_image in warped_images:
    cv2.imshow('result', warped_image[0])
    cv2.waitKey()
    cv2.destroyAllWindows()

## TESTING

In [16]:
from pathlib import Path

In [17]:
def test():
    image_patterns = ['*.jpg', '*.png', '*.jpeg', '*.JPG', '*.PNG', '*.JPEG']
    input_dir = Path('./test/test/')
    image_paths = []
    for image_pattern in image_patterns:
        image_paths += list(input_dir.glob(f'**/{image_pattern}'))
        
    card_extractor = CardExtraction(3, 
                                './weight/2011110823/best_model_31_dice_mAP=0.9705.pt', 
                                image_size=(768, 768))
    
    for image_path in image_paths:
        image = cv2.imread(str(image_path))
        warped_images = card_extractor(image)
        for i in range(len(warped_images)):
            cv2.imwrite(f'./test/test/output/{image_path.stem}_{i}.jpg', warped_images[i][0])
            pts = np.int32(warped_images[i][1])
            cv2.polylines(image, [pts], True, (0, 255, 0), 2) 
            cv2.imwrite(f'./test/test/output/{image_path.stem}_box_{i}.jpg', image)
            

In [None]:
import cv2
import torchvision
test()