In [1]:
import sys
import cv2
from detect_cell_phone import EfficientDet
sys.path.append('/backup1/xymmodel/HandPose')
# sys.path.append('/backup1/xymmodel/yolo')
from detect_finger import detect_finger
# from detect_phone import detect_phone
import numpy as np

In [2]:
def inside(point, box):
        return box[0] < point[0] < box[2] and box[1] < point[1] < box[3]
    
def get_larger_box(xy, scale, w, h):
    """
    Args:
        xy: bounding boxes in boundary coordinates, a tensor of size (n_boxes, 4)
        scale: the scale of box
    Returns: 
        bounding boxes in center-size coordinates, a tensor of size (n_boxes, 4)
    """
    xy = np.reshape(xy, (1, 4))
    cx_cy = np.concatenate(((xy[:, 2:] + xy[:, :2]) / 2,  # c_x, c_y
                      (xy[:, 2:] - xy[:, :2]) * scale), axis = 1) # w, h
    # 将直接放大scale倍改为仅放大小边长度
    # min的值是较短的边乘以scale
    min = np.array([cx_cy[:, 3][i] if(cx_cy[:, 3][i] < cx_cy[:, 2][i]) else cx_cy[:, 2][i] for i in range(len(cx_cy[:, 3]))], dtype=np.int16)
    left_y = cx_cy[:, 1] - min * 0.5
    left_x = cx_cy[:, 0] - 0.5 * min
    right_y = cx_cy[:, 1] + 0.5 * min
    right_x = cx_cy[:, 0] + 0.5 * min
    left_y = np.array([item if item > 0 else 0 for item in left_y], dtype=np.int16)
    left_x = np.array([item if item > 0 else 0 for item in left_x], dtype=np.int16)
    right_y = np.array([item if item <= h else h for item in right_y], dtype=np.int16)
    right_x = np.array([item if item <= w else w for item in right_x], dtype=np.int16)
    return np.stack((left_x, left_y, right_x, right_y), axis = 1) # w, h

def find_start(video_path):
    video = cv2.VideoCapture(video_path)
    count = 0
    if not video.isOpened():
        print("Couldn't open the video...")
    while True:
        count += 1
        ret, frame = video.read()
        if count < 480:
            continue
        if not ret:
            break

        # 第一步：检测手机，检测不到手机则跳过当前帧。
        efficientdet = EfficientDet(5)
        phone = efficientdet.efficientdet(frame)
        if np.all(phone == None):
            print(f'第{count}帧，检测不到手机...')
            continue

        # 第二步：检测指尖。这步里会对手机区域放大后进行裁剪，只检测裁剪区域内是否存在指尖，检测不到则跳过当前帧。
        big_box = get_larger_box(phone, 4, frame.shape[1], frame.shape[0])[0]
        mid_box = get_larger_box(phone, 1.9, frame.shape[1], frame.shape[0])[0]
        threshold = 0.1
        finger = detect_finger(frame, big_box, threshold)
        if finger[0] == [None, None]:
            print(f'第{count}帧，检测到手机但检测不到手指...')
            return {
                'frame':frame,
                'count':count,
                'phone':phone,
                'finger':finger,
                'phone_4_times':big_box,
                'mid_box':mid_box
            }
            continue
        # 第三步：如果检测到了指尖，且该指尖区域在两倍到四倍手机尺寸范围内，才认为是右手指尖。
        else:
            if not inside(finger[0][1], mid_box):
                print(f"开始帧为第{count}帧...")
                break
            elif finger[1][0]:
                if not inside(finger[1][1], mid_box):
                    print(f"开始帧为第{count}帧...")
                    break
                else:
                    print(f'第{count}帧，检测到手指，但是不在指定区域...')
                    continue
            else:
                print(f'第{count}帧，检测到手指，但是不在指定区域...')
                continue
    return {
        'frame':frame,
        'count':count,
        'phone':phone,
        'finger':finger,
        'phone_4_times':big_box,
        'mid_box':mid_box
    }


In [2]:
# 直接取左手的左边
def get_larger_box(xy, scale, w, h, direction):
    """
    Args:
        xy: bounding boxes in boundary coordinates, a tensor of size (n_boxes, 4)
        scale: the scale of box
    Returns: 
        bounding boxes in center-size coordinates, a tensor of size (n_boxes, 4)
    """
    xy = np.reshape(xy, (1, 4))
    cx_cy = np.concatenate(((xy[:, 2:] + xy[:, :2]) / 2,  # c_x, c_y
                      (xy[:, 2:] - xy[:, :2]) * scale), axis = 1) # w, h
    # 将直接放大scale倍改为仅放大小边长度
    # min的值是较短的边乘以scale
    min = np.array([cx_cy[:, 3][i] if(cx_cy[:, 3][i] < cx_cy[:, 2][i]) else cx_cy[:, 2][i] for i in range(len(cx_cy[:, 3]))], dtype=np.int16)
    directions = [tuple()] * 4
    directions[0] = (xy[:, 2] + (xy[:, 2] - xy[:, 0]) + min, cx_cy[:, 1] - min * 0.5, xy[:, 2], cx_cy[:, 1] + 0.5 * min)
    directions[3] = (cx_cy[:, 0] - min * 0.5, xy[:, 1] - (xy[:, 3] - xy[:, 1]) - min, cx_cy[:, 0] + min * 0.5, xy[:, 1])
    directions[2] = (xy[:, 0] - (xy[:, 2] - xy[:, 0]) - min, cx_cy[:, 1] - min * 0.5, xy[:, 0], cx_cy[:, 1] + 0.5 * min)
    directions[1] = (cx_cy[:, 0] - min * 0.5, xy[:, 3], cx_cy[:, 0] + min * 0.5, xy[:, 3] + (xy[:, 3] - xy[:, 1]) + min)
    (left_x, left_y, right_x, right_y) = directions[direction - 1]
    left_y = np.array([item if item > 0 else 0 for item in left_y], dtype=np.int16)
    left_x = np.array([item if item > 0 else 0 for item in left_x], dtype=np.int16)
    right_y = np.array([item if item <= h else h for item in right_y], dtype=np.int16)
    right_x = np.array([item if item <= w else w for item in right_x], dtype=np.int16)
    return np.stack((left_x, left_y, right_x, right_y), axis = 1) # w, h

def find_start(video_path, direction_path=''):
    if direction_path:
        with open(direction_path) as f:
            direction = int(f.readline())
    else:
        direction = 3
    video = cv2.VideoCapture(video_path)
    count = 0
    if not video.isOpened():
        print("Couldn't open the video...")
    while True:
        count += 1
        ret, frame = video.read()
        if count < 901:
            continue
        if not ret:
            break

        # 第一步：检测手机，检测不到手机则跳过当前帧。
#         phone = detect_phone(frame)
        efficientdet = EfficientDet(6)
        phone = efficientdet.efficientdet(frame)
        if phone == []:
            print(f'第{count}帧，检测不到手机...')
            continue

        # 第二步：检测指尖。这步里会对手机区域放大后进行裁剪，只检测裁剪区域内是否存在指尖，检测不到则跳过当前帧。
        phone_4_times = get_larger_box(phone, 4, frame.shape[1], frame.shape[0], direction)[0]
        threshold = 0.1
        finger = detect_finger(frame, phone_4_times, threshold)
        if finger[0] == [None, None]:
            print(f'第{count}帧，检测到手机但检测不到手指...')
            return {
                'frame':frame,
                'count':count,
                'phone':phone,
                'finger':finger,
                'phone_4_times':phone_4_times
            }
            continue
        # 第三步：如果检测到了指尖，且该指尖区域在两倍到四倍手机尺寸范围内，才认为是右手指尖。
        else:
            print(f"开始帧为第{count}帧...")
            break
    return {
        'frame':frame,
        'count':count,
        'phone':phone,
        'finger':finger,
        'phone_4_times':phone_4_times
    }

In [3]:
# ret = find_start('/home/user/zy/attack-on-pattern-pin/data/WeChat_20200912143947.mp4')
video_name = 'SSSM-B5-002'
ret = find_start(f'/home/user/zy/attack-on-pattern-pin/data/batch_5/{video_name}.mp4', f'/home/user/zy/attack-on-pattern-pin/results/batch_5/{video_name}/direction.txt')
# ret = find_start('/home/user/zy/attack-on-pattern-pin/data/wyd_2.mp4')

AssertionError: 
The NVIDIA driver on your system is too old (found version 10000).
Please update your GPU driver by downloading and installing a new
version from the URL: http://www.nvidia.com/Download/index.aspx
Alternatively, go to: https://pytorch.org to install
a PyTorch version that has been compiled with your version
of the CUDA driver.

In [19]:
import cv2
print(ret['finger'])
cv2.rectangle(ret['frame'],(ret['phone'][0], ret['phone'][1]), (ret['phone'][2], ret['phone'][3]),(0,255,0),3)
# cv2.rectangle(ret['frame'],(ret['mid_box'][0], ret['mid_box'][1]),(ret['mid_box'][2], ret['mid_box'][3]),(0,255,0),3)
cv2.rectangle(ret['frame'],(ret['phone_4_times'][0], ret['phone_4_times'][1]),(ret['phone_4_times'][2], ret['phone_4_times'][3]),(0,255,0),3)
# cv2.circle(ret['frame'], tuple(ret['finger'][0][1]), 3, (0,0,255), -1)
# cv2.circle(ret['frame'], tuple(ret['finger'][1][1]), 3, (0,0,255), -1)
cv2.imwrite('/home/user/zy/attack-on-pattern-pin/data/temp2.jpg', ret['frame'])

[[None, None], [None, None]]


True

In [24]:
get_larger_box([0, 0, 1, 2], 4, 100, 100)

[[0.5 1.  4.  8. ]]
[array([-1.5]), array([-1.]), array([2.5]), array([3.])]
[array([0], dtype=int16), array([0], dtype=int16), array([2], dtype=int16), array([3], dtype=int16)]


array([[0, 0, 2, 3]], dtype=int16)