In [3]:
import numpy as np
import cv2
import csv
import math
import dlib

In [42]:
# bb = bounding box


"""
Section of bounding boxes operations

"""

def scale_bb(x, y, w, h, max_x, max_y, scale):
    
    # returns scaled parameters of the bounding box
    
    return [
            int(x - w * (scale - 1)/2), 
            int(y - h * (scale - 1)/2), 
            int(w * (scale)),
            int(h * (scale))
            ]

def intersection_area(x1, y1, w1, h1, x2, y2, w2, h2):
    x11 = x1
    x21 = x2
    x12 = x1 + w1
    x22 = x2 + w2
    y11 = y1
    y21 = y2
    y12 = y1 + h1
    y22 = y2 + h2
    x_overlap = max(0, min(x12,x22) - max(x11,x21));
    y_overlap = max(0, min(y12,y22) - max(y11,y21));
    overlapArea = x_overlap * y_overlap;
    return overlapArea

def square(bb):
    
    # count bb square
    
    return bb[2] * bb[3]

def intersection_square(bb1, bb2):
    
    # count square of intersection
    
    x_left_bound = sorted([bb1[0], bb2[0]])
    x_right_bound = sorted([bb1[0] + bb1[2], bb2[0] + bb2[2]])
    y_lower_bound = sorted([bb1[1], bb2[1]])
    y_upper_bound = sorted([bb1[1] + bb1[3], bb2[1] + bb2[3]])
    return (x_right_bound[0] - x_left_bound[1]) * (y_upper_bound[0] - y_lower_bound[1])

def have_intersection(bb1, bb2):
    
    # check if two bounding boxes have an intersection
    
    return not (bb1[0] + bb1[2] < bb2[0] 
                or bb2[0] + bb2[2] < bb1[0] 
                or bb1[1] + bb1[3] < bb2[1] 
                or bb2[1] + bb2[3] < bb1[1])


def pts_to_bb(pts):
    x = min(pts[0][0], pts[1][0], pts[2][0], pts[3][0]) 
    y = min(pts[0][1], pts[1][1], pts[2][1], pts[3][1]) 
    w = max(pts[0][0], pts[1][0], pts[2][0], pts[3][0]) - x
    h = max(pts[0][1], pts[1][1], pts[2][1], pts[3][1]) - y
    return [x, y, w, h]


def detect_faces(img, detector):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return detector.detectMultiScale(gray, 1.1, 3) # DO NOT CHANGE PARAMETERS  1.099, 0

def filter_bbs(bbs, max_w, max_h):
    res = []
    for [x, y, w, h] in bbs:
        bbs_inside_count = 0
        good = 1
        
        for [x2, y2, w2, h2] in bbs:
            if w > max_w / 7.0 or h > max_h / 7.0:
                good = 0
                break
            if [x, y, w, h] != [x2, y2, w2, h2] and have_intersection([x, y, w, h], [x2, y2, w2, h2]):
                area = intersection_area(x, y, w, h, x2, y2, w2, h2)
                if float(area) / (w2*h2) > 0.8:
                    bbs_inside_count += 1
#                 elif area != w*h:
#                     if area / w2*h2 > 0.9 and area / w*h > 0.5:
#                         good = 0
#                         break
                if (bbs_inside_count > 1):
                    good = 0
                    break
        if good:
            res.append(np.array([x, y, w, h]))
        
    return np.array(res)
                       
def check_bb_for_face(bb, img, detector="VJ"):
    crop = img[bb[1]:bb[1] + bb[3], bb[0]:bb[0] + bb[2]]
    if detector == "dlib":
        dlib_detector = dlib.get_frontal_face_detector()
        detections = dlib_detector(crop, 1)
        for k,d in enumerate(detections):
            return True
        return False
    if detector == "VJ":
        face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
        gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
        bbs = face_cascade.detectMultiScale(gray, 1.099, 0)
        if len(bbs):
            return True
        return False
    if detector == "cnn":
        prediction = Net.predict([crop],oversample=False)
        if (prediction[0][0] > 0.8) or (prediction[0][1] > 0.8):
            return True
        return False

    
def preprocess_embbedings(bbs):

#     returns only those bbs which do not contain more than 1 embbeding bb

    class Collision:

#     Class to store overlapings and embeddings for a bounding box

        def __init__(self, bb_id, overlap_ids=[], embedding_ids=[]):
            self.id = bb_id
            self.overlaps = overlap_ids
            self.embeddings = embedding_ids


    bbs_sorted = sorted(bbs, key = lambda bb: bb[0])

    collisions = []
    for bb_id in range(len(bbs_sorted)):
        overlap_ids = []
        embedding_ids = []
        for bb_other_id in range(bb_id + 1, len(bbs_sorted)):
            bb_right = bbs_sorted[bb_id][0] + bbs_sorted[bb_id][2]
            if bbs_sorted[bb_other_id][0] > bb_right:
                break
            if bbs_sorted[bb_other_id][0] + bbs_sorted[bb_other_id][2] < bb_right:
                if have_intersection(bbs_sorted[bb_other_id], bbs_sorted[bb_id]):
                    embedding_ids.append(bb_other_id)
            else:
                if have_intersection(bbs_sorted[bb_other_id], bbs_sorted[bb_id]):
                    intersect_square = intersection_square(bbs_sorted[bb_other_id], bbs_sorted[bb_id])
                    if intersect_square > 0.3 * max(square(bbs_sorted[bb_other_id]), square(bbs_sorted[bb_id])):
                        overlap_ids.append(bb_other_id)
        collisions.append(Collision(bb_id, overlap_ids, embedding_ids))

    ids_to_remove = []
    for bb in collisions:
        if len(bb.embeddings) >= 1:
            ids_to_remove.append(bb.id)
        if len(bb.overlaps) > 0:
            id_to_save = max(bb.overlaps, key=lambda id: square(bbs_sorted[id]))
            ids_to_remove.extend([id for id in bb.overlaps if id != id_to_save])

    new_bbs = [bbs_sorted[i] for i in range(len(bbs_sorted)) if i not in ids_to_remove]
    
    return new_bbs    

def get_bbs(img, detector):
    
    # returns all found faces coordinates on the image
    bbs = detect_faces(img, detector)
#     filtered = preprocess_embbedings(bbs)
#     checked = [bb for bb in filtered if check_bb_for_face(bb, img)]
#     return np.array(checked)
    return bbs


def are_close(bb1, bb2):
    
    # check if two bounding boxes are close and have similar shapes
    
    if abs(bb1[2]*bb1[3] - bb2[2]*bb2[3]) > max(bb1[2]*bb1[3], bb2[2]*bb2[3]) / 4:
        return False
    
    return (abs(bb1[0] + bb1[2] / 2 - bb2[0] - bb2[2] / 2) < bb1[2] / 2) and \
            (abs(bb1[1] + bb1[3] / 2 - bb2[1] - bb2[3] / 2) <  bb1[3] / 2)


"""
Editing found faces

"""    
def track_obj(face, frame):
    track_window = face
    (x, y, w, h) = face
    (x, y, w, h) = (int(max(0, x)), int(max(0, y)), int(max(0, w)), int(max(0, h)))
#     x = int(min(frame.shape[0], x))
#     y = int(min(frame.shape[1], y))
#     w = int(min(frame.shape[0] - x, w))
#     h = int(min(frame.shape[1] - y, h)) 
#     print (x, y, w, h)               
    roi = frame[y:y + h, x:x + w]
    hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
    mask = cv2.inRange(hsv_roi, np.array((0., 51., 89.)), np.array((17., 140., 255.)))
    roi_hist = cv2.calcHist([hsv_roi], [0], mask, [180], [0, 180])
    cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)
    term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1)

    return track_window, roi, hsv_roi, mask, roi_hist, term_crit    



def preprocess_bbs(bbs, frames_arr, timeout=100, im_width=800, im_height=400):
    
    # returns improved bounding boxes with person_id
    
    
    faces = {} # faces dict before proccessing
    max_id = 1
    
    for fn in bbs:
        faces[fn] = {}
        for bb in bbs[fn]:
            faces[fn][max_id] = {}
            faces[fn][max_id]['timeout'] = timeout
            faces[fn][max_id]['coords'] = scale_bb(bb[0], bb[1], bb[2], bb[3], im_width, im_height, 1)
            max_id += 1
            
    
    new_faces = {} # faces dict after proccessing
    
    new_faces[0] = faces[0]
         
    # params for ShiTomasi corner detection
    feature_params = dict( maxCorners = 100,
                           qualityLevel = 0.3,
                           minDistance = 7,
                           blockSize = 7 )
    
    # Parameters for lucas kanade optical flow
    lk_params = dict( winSize  = (15,15),
                      maxLevel = 2,
                      criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
    
    opt_flow_dict = {}
    mean_shift_dict = {}
    
    detection_cell = 1
    
    for frame in faces:
        if frame != 0:
            new_faces[frame] = {} 
            
            # update detected faces
            
            if frame % detection_cell == 0: # detect faces each detection_cell frame
                for cur_id in faces[frame]:
                    found = 0
                    intersect = 0
                    for prev_id in new_faces[frame - 1]:          
                        if (have_intersection(new_faces[frame - 1][prev_id]['coords'], 
                                              faces[frame][cur_id]['coords'])):
                            intersect = 1

                        if new_faces[frame - 1][prev_id]['timeout'] > 0:
                            if not found and are_close(new_faces[frame - 1][prev_id]['coords'], 
                                                       faces[frame][cur_id]['coords']):
                                
                                new_faces[frame][prev_id] = new_faces[frame-1][prev_id].copy()
                                new_faces[frame - 1][prev_id]['timeout'] = -1
                                new_faces[frame][prev_id]['timeout'] = timeout
                                found = 1
                                
                                
#                                 if prev_id  in mean_shift_dict:
                                    # track this face for meanshift

                                track_windows, rois, hsv_rois, masks, roi_hists, term_crit = \
                                        track_obj(new_faces[frame][prev_id]['coords'], frames_arr[frame])
                                mean_shift_dict[prev_id] = [track_windows, roi_hists]

                                # track this face for optical flow

                                (x, y, h, w) = new_faces[frame][prev_id]['coords']

                                old_gray = cv2.cvtColor(frames_arr[frame], cv2.COLOR_BGR2GRAY)                
                                mask = np.zeros_like(old_gray)
                                mask[y:y+h, x:x+w] = 1
                                p0 = cv2.goodFeaturesToTrack(old_gray, mask = mask, **feature_params)
                                opt_flow_dict[prev_id] = [old_gray, p0]
                                

                    if not found and not intersect: 
                        # insert new face which was not detected before
                        
                        new_faces[frame][cur_id] = faces[frame][cur_id]
                        
                        # track this face for meanshift
                        
                        track_windows, rois, hsv_rois, masks, roi_hists, term_crit = \
                                track_obj(new_faces[frame][cur_id]['coords'], frames_arr[frame])
                        mean_shift_dict[cur_id] = [track_windows, roi_hists]
                        
                        # track this face for optical flow
                        
                        (x, y, h, w) = new_faces[frame][cur_id]['coords']
                        
                        old_gray = cv2.cvtColor(frames_arr[frame], cv2.COLOR_BGR2GRAY)                
                        mask = np.zeros_like(old_gray)
                        mask[y:y+h, x:x+w] = 1
                        p0 = cv2.goodFeaturesToTrack(old_gray, mask = mask, **feature_params)
                        opt_flow_dict[cur_id] = [old_gray, p0]
               
            # update lost faces from previous frame
            
            for prev_id in new_faces[frame - 1]:
                if new_faces[frame - 1][prev_id]['timeout'] > 0:
                    new_faces[frame - 1][prev_id]['timeout'] -= 1
                    
                    if not prev_id in opt_flow_dict: # start tracking
                        
                        # MEANSHIFT
                        
                        track_windows, rois, hsv_rois, masks, roi_hists, term_crit = \
                            track_obj(new_faces[frame - 1][prev_id]['coords'], frames_arr[frame-1])
                        
                        mean_shift_dict[prev_id] = [track_windows, roi_hists]
                        
                        # OPTICAL FLOW
                        
                        (x, y, h, w) = new_faces[frame - 1][prev_id]['coords']
                        
                        (x, y, h, w) = scale_bb(x, y, h, w, frames_arr[frame - 1].shape[0], 
                                                frames_arr[frame - 1].shape[1], 1)
                        
                        old_gray = cv2.cvtColor(frames_arr[frame - 1], cv2.COLOR_BGR2GRAY)

                        mask = np.zeros_like(old_gray)         
                        mask[y:y+h, x:x+w] = 1

                        p0 = cv2.goodFeaturesToTrack(old_gray, mask = mask, **feature_params)
                        
                        opt_flow_dict[prev_id] = [old_gray, p0]
                        
                        # update bb
                        new_faces[frame][prev_id] = new_faces[frame - 1][prev_id].copy()
                        
                    
                    else: # continue tracking
                        
                        # OPTICAL FLOW
                        
                        (x0, y0, h0, w0) = new_faces[frame - 1][prev_id]['coords']
                        
                        p0 = opt_flow_dict[prev_id][1]
                        
                        count = 0
                        
#                         (x, y, h, w) = (0,0,0,0)
                        
                        if not p0 is None:
                            
                            old_coords = new_faces[frame - 1][prev_id]['coords']
                            
                            old_gray = opt_flow_dict[prev_id][0]

                            frame_gray = cv2.cvtColor(frames_arr[frame], cv2.COLOR_BGR2GRAY)
                            
                            p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
                            p0r, st, err = cv2.calcOpticalFlowPyrLK(frame_gray, old_gray, p1, None, **lk_params)
                            
                            cond = abs(p0-p0r).max(-1) < 1
                            
                            new_tracks = []
                            track_points_limit = 5
                            
                            for tr, (x, y), good_flag in zip(p0, p1.reshape(-1, 2), cond):
                                if not good_flag:
                                    continue
                                    
                                new_tracks.append(np.array([x, y]))

                                if len(new_tracks) > track_points_limit:
                                    del new_tracks[0]
#                                     del new_tracks[1]
                              
#                                 new_tracks.append(tr[0])
                            
#                             opt_flow_dict[prev_id][1] = new_tracks.copy()
   
                            center_m = []
                            for z in new_tracks:
#                                 print z
                                center_m.append(z)
                            
#                             print center_m
                            center_m = np.mean(np.array(center_m), axis=0)
#                             print center_m
                            if type(center_m) is not np.float64:
                                (x, y, w, h) = (int(center_m[0]-w0/2), 
                                                int(center_m[1]-h0/2), w0, h0)
                                
                            count = len(new_tracks)
          
                        if count != 0:
                        
                            # create new bb coordinates for optical flow from good_new
                            
                            old_gray = frame_gray.copy()
                            p0 = np.array(new_tracks).reshape(-1,1,2)
                            opt_flow_dict[prev_id] = [old_gray, p0]
                            
                        else:
                            (x, y, h, w) = new_faces[frame - 1][prev_id]['coords']
                        
                            (x, y, h, w) = scale_bb(x, y, h, w, frames_arr[frame - 1].shape[0], 
                                                    frames_arr[frame - 1].shape[1], 1)

                            old_gray = cv2.cvtColor(frames_arr[frame - 1], cv2.COLOR_BGR2GRAY)

                            mask = np.zeros_like(old_gray)         
                            mask[y:y+h, x:x+w] = 1

                            p0 = cv2.goodFeaturesToTrack(old_gray, mask = mask, **feature_params)

                            opt_flow_dict[prev_id] = [old_gray, p0]

                            # update bb
                            new_faces[frame][prev_id] = new_faces[frame - 1][prev_id].copy()
                        
                        # MEANSHIFT
#                         print (x, y, h, w)
                        hsv = cv2.cvtColor(frames_arr[frame], cv2.COLOR_BGR2HSV)
                        dst = cv2.calcBackProject([hsv], [0], mean_shift_dict[prev_id][1], [0, 180], 1)
                        ret, mean_shift_dict[prev_id][0] = cv2.meanShift(dst, 
                                                                         tuple(mean_shift_dict[prev_id][0]), 
                                                                         term_crit)
                        (x2, y2, w2, h2) = mean_shift_dict[prev_id][0]
                        if count == 0:
                             (x, y, h, w) = (x2, y2, w2, h2)
                        # update tracking dict
                        
                        new_faces[frame][prev_id] = new_faces[frame - 1][prev_id].copy()
                        
                        # (x, y, w, h) - результаты optflow
                        # (x2, y2, w2, h2) - результаты meanshift
                        # я беру от них взвешенное среднее и сохраняю как новую рамку
                        
                        optflow_weight = 1.0
                        msh_weight = 1 - optflow_weight
                        
                        
                        new_faces[frame][prev_id]['coords'] = (int(x*optflow_weight + x2*msh_weight), \
                                                               int(y*optflow_weight + y2*msh_weight), \
                                                               int(w*optflow_weight + w2*msh_weight), \
                                                               int(h*optflow_weight + h2*msh_weight))
                        
                        
                        
                        
    return new_faces


"""
Section of reading, drawing and writing

"""

def write_cropped_image_by_bb(folder_path, frame_num, person_id, img, bb):
    cv2.imwrite(folder_path +  "/frame%dperson%d.jpg" % (frame_num, person_id), 
                img[bb[1] : bb[1] + bb[3], bb[0] : bb[0] + bb[2]]);


    
    # visualizes bbs at a new video
    
    vidFile = cv2.VideoCapture(input_file)
    ret, frame = vidFile.read()
    
    height, width, layers =  frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    
    output_video = cv2.VideoWriter(output_file, fourcc, 15.0, (width, height))
    
    raw_faces, frames = frames 
    
    frames = preprocess_bbs(raw_faces, frames)
    
    for frame_num in frames:
        ret, frame = vidFile.read()     
        output_video.write(draw_faces_bbs(frame, frames[frame_num]))
    
    output_video.release()

    

def draw_faces_bbs(img, faces_bbs):
    
    # draw rectangles with labels on img
    
    for face_id in faces_bbs:
        (x,y,w,h) = faces_bbs[face_id]['coords']
        cv2.putText(img, str(face_id), (x, y), 1, 1, (0,0,255), 2, cv2.LINE_AA)
        img = draw_rect(img, scale_bb(x, y, w, h, img.shape[0], img.shape[1], 1))
    
    return img


def draw_rect(img, bb):
    
    # just draw a rectangle
    
    x, y, w, h = bb
    cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255 ,0), 1)
    return img


def video_to_frames_dict(input_file, frames_num, cell, detector):
    
    # convert video file to dictionary of frames, ids and bbs
    
    vidFile = cv2.VideoCapture(input_file)
    cur_frame = 0    
    frames = {}
    
    ret = True
    
    all_frames = [None] * frames_num
    while cur_frame < frames_num and ret:
        ret, frame = vidFile.read() 
        all_frames[cur_frame] = frame
        if cur_frame%cell == 0:
            frames[cur_frame] = get_bbs(frame, detector)
        else:
            frames[cur_frame] = np.array([])
        cur_frame += 1
    
    return frames, all_frames


def save_dict_as_csv(faces_dictionary):
    with open('faces.csv', 'wb') as csvfile:
        writer = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(['frame', 'person_id', 'x', 'y', 'w', 'h'])
        for frame_num in faces_dictionary:
            for person_id in faces_dictionary[frame_num]:
                x, y, w, h = faces_dictionary[frame_num][person_id]['coords']
                writer.writerow([frame_num, person_id, x, y, w, h])
        

def video_to_faces(folder_path, input_video, frames_num, detector):
    
    raw_faces, frames = video_to_frames_dict(input_video, frames_num, detector)
    
    faces = preprocess_bbs(raw_faces, frames)
    
    save_dict_as_csv(faces)
    
    vidFile = cv2.VideoCapture(input_video)
    cur_frame = 0       
    ret = 1
    
    while cur_frame < frames_num and ret:
        ret, frame = vidFile.read() 
        for person_id in faces[cur_frame]:
            write_cropped_image_by_bb(folder_path, cur_frame, person_id, frame, faces[cur_frame][person_id]['coords'])
        cur_frame += 1
    
    return

In [43]:
%%time
def extract_people(video_file, visualize=False, frames_limit=100):
    face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
    
    if visualize:
        write_video(video_file, video_to_frames_dict(video_file, frames_limit, cell=100, detector=face_cascade), 'video_aud_cam.avi')
    
#     video_to_faces('./faces', video_file, frames_limit, face_cascade)
    
extract_people('splice1.mp4', True, 100)

CPU times: user 2min 12s, sys: 2.44 s, total: 2min 14s
Wall time: 1min 28s
