In [114]:
# Written by Tutku Tashkan 17/03/2023

import os
from os.path import exists, join, basename, splitext

import random
import PIL
import torchvision
import cv2
import numpy as np
import torch
import pandas as pd
import wget
torch.set_grad_enabled(False)

import time
import matplotlib
import matplotlib.pylab as plt
plt.rcParams["axes.grid"] = False

import math


In [115]:
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
model = model.eval().cpu()

coco_names = ['unlabeled', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'street sign', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'hat', 'backpack', 'umbrella', 'shoe', 'eye glasses', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'plate', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'mirror', 'dining table', 'window', 'desk', 'toilet', 'door', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'blender', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
colors = [[random.randint(0, 255) for _ in range(3)] for _ in coco_names]



In [116]:
cap = cv2.VideoCapture(0)  # Open the first camera connected to the computer
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)

while True:
    ret, frame = cap.read()  # Read the camera frame

    if ret:

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Convert to PIL Image
        image = PIL.Image.fromarray(frame)

        t = time.time()
        image_tensor = torchvision.transforms.functional.to_tensor(image).cpu()
        output = model([image_tensor])[0]
        print('executed in %.3fs' % (time.time() - t))

        result_image = np.array(image.copy())

        for box, label, score, mask in zip(output['boxes'], output['labels'], output['scores'], output['masks']):

            if score > 0.5:
                color = colors[label]

                # Draw box
                tl = round(0.002 * max(result_image.shape[0:2])) + 1  # line thickness
                c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
                cv2.rectangle(result_image, c1, c2, color, thickness=tl)

                # Draw text
                display_txt = "%s: %.1f%%" % (coco_names[label], 100*score)
                tf = max(tl - 1, 1)  # font thickness
                t_size = cv2.getTextSize(display_txt, 0, fontScale=tl / 3, thickness=tf)[0]
                c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
                cv2.rectangle(result_image, c1, c2, color, -1)  # filled
                cv2.putText(result_image, display_txt, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

                mask_np = mask.squeeze().cpu().numpy()
                
                # Calculate center point
                xy = np.where(mask_np>0)
                y = xy[0]
                x = xy[1]
                center = np.array([x.mean(), y.mean()])
                center = center.astype(int)

                # Perform PCA on mask
                coords = np.vstack([x, y])
                cov = np.cov(coords)
                evals, evecs = np.linalg.eig(cov)
                sort_indices = np.argsort(evals)[::-1]
                evec1, evec2 = evecs[:, sort_indices]
                principal_axes = evecs[:, sort_indices[:2]]

                # Compute the angle between the first principal axis and the x-axis
                angle = math.atan2(principal_axes[1,0], principal_axes[0,0])

                # Length of the lines
                length1 = int(2 * math.sqrt(evals[sort_indices[0]]))
                length2 = int(2 * math.sqrt(evals[sort_indices[1]]))
                endpoint1 = (int(center[0] + length1 * math.cos(angle)), int(center[1] + length1 * math.sin(angle)))
                endpoint2 = (int(center[0] - length2 * math.sin(angle)), int(center[1] + length2 * math.cos(angle)))

                # Draw the circle and the lines
                cv2.circle(result_image, center, 5, (0, 255, 0), 2)
                cv2.line(result_image, center, endpoint1, (0, 0, 255), 2)
                cv2.line(result_image, center, endpoint2, (255, 0, 0), 2)

                # Overlay the mask
                mask = cv2.resize(mask_np, (result_image.shape[1], result_image.shape[0]))
                mask = mask.astype(np.float32) * 0.5
                result_image = (result_image.astype(np.float32) + mask[:, :, np.newaxis] * color).clip(0, 255).astype(np.uint8)

        cv2.imshow('Mask R-CNN output', result_image)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        
cap.release()
cv2.destroyAllWindows()

executed in 4.057s
executed in 4.904s
executed in 3.932s
executed in 3.600s
executed in 3.296s
executed in 3.291s
executed in 3.245s
