In [None]:
import os
import cv2
import numpy as np
from keras.models import load_model
import warnings
import argparse
import shutil
import time
import matplotlib.pyplot as plt
import utils

warnings.filterwarnings("ignore")
np.random.seed(1234)

In [None]:
# parse input args
'''argsParser = argparse.ArgumentParser(description="Apply object detection to the input video")
argsParser.add_argument("-m", "--model", required=True, help="path of model for detection")
argsParser.add_argument("-i", "--input_dir", required=True, help="path to input dir")
argsParser.add_argument("-o", "--output_dir", required=True, help="path to output dir")
argsParser.add_argument("-y", "--yolo", required=True, help="base path to YOLO directory")
argsParser.add_argument("-c", "--confidence", type=float, default=0.5, help="min prob to filter weak detections")
args = vars(argsParser.parse_args())'''

# set constants
MODEL_PATH = 'yolo-keras.h5' #args["model"]
INPUT_DIR = '/home/ubuntu/valerio/data/restaurants-videos'#args['input_dir']
OUTPUT_DIR = 'ccc'#args['output_dir']
YOLO_DIR = 'yolo-coco'#args['yolo']
CLASS_THRESHOLD = 0.5#args['confidence']
WEIGHTS_PATH = os.path.join(YOLO_DIR, 'yolov3.weights')
with open(os.path.join(YOLO_DIR, "coco.names")) as f:
    lines = f.readlines()
    LABELS = [e.strip() for e in lines]
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")
YOLOV3_INPUT_WIDTH, YOLOV3_INPUT_HEIGHT = 416, 416
YOLOV3_ANCHORS = [[116, 90, 156, 198, 373, 326],[30, 61, 62, 45, 59, 119], [10, 13, 16, 30, 33, 23]]

# create output dir (remove if it exists)
if os.path.isdir(OUTPUT_DIR):
    shutil.rmtree(OUTPUT_DIR)
os.mkdir(OUTPUT_DIR)

In [None]:
# load yolo model
model = load_model(MODEL_PATH)
print("[INFO] Model loaded")

In [None]:
# start processing one vide at a time
for cam_dir in os.listdir(INPUT_DIR):
    for video_file in os.listdir(os.path.join(INPUT_DIR, cam_dir)):    
        start_time = time.time()
        print("[INFO] processing %s ..." % os.path.join(INPUT_DIR, cam_dir, video_file), end='', flush=True)

        intial_W, initial_H = None, None
        X, Y = [], []
        vs = cv2.VideoCapture(os.path.join(INPUT_DIR, video_file))
        #k=0
        while True:
            #t1=time.time()
            (grabbed, frame) = vs.read()

            # grabbed is False when no more frames are available
            if not grabbed:
                break

            # save orinal size of each frame
            if intial_W is None or initial_H is None:
                initial_H, initial_W = frame.shape[:2]

            # adapt the frame to the yolov3 network architecture (and normalize it)
            frame = cv2.resize(frame, (YOLOV3_INPUT_WIDTH, YOLOV3_INPUT_HEIGHT), interpolation=cv2.INTER_AREA)
            frame = frame.astype('float32') / 255.0
            frame = np.expand_dims(frame, 0)

            # run prediction
            yhat = model.predict(frame)

            # analyze output to retrieve boxes
            boxes = []
            for i in range(len(yhat)):
                boxes += utils.decode_netout(yhat[i][0], YOLOV3_ANCHORS[i], CLASS_THRESHOLD, YOLOV3_INPUT_WIDTH, YOLOV3_INPUT_HEIGHT)

            # scale boxes in order to fit the initial size of the frame
            utils.correct_yolo_boxes(boxes, initial_H, initial_W, YOLOV3_INPUT_WIDTH, YOLOV3_INPUT_HEIGHT)

            # keep only boxes (predicted label and its score) above the confidence threshold
            v_boxes, v_labels, v_scores = utils.get_boxes(boxes, LABELS, CLASS_THRESHOLD)

            # analyze remaining boxes:
            # - keep only the ones associaed with a person
            # - save the coordinates of the centre of each box
            for i in range(len(v_boxes)):
                if v_labels[i] == 'person':
                    y1, x1, y2, x2 = v_boxes[i].ymin, v_boxes[i].xmin, v_boxes[i].ymax, v_boxes[i].xmax
                    X.append(x1 + (x2 - x1) / 2)
                    Y.append(y1 + (y2 - y1) / 2)

            #print(time.time()-t1)

        # compute the size of figures in a way that it is proportional to the one of each frame
        UNIT = 10
        if initial_W > initial_H:
            fig_height = UNIT
            fig_width = UNIT * (initial_W / initial_H)
        else:
            fig_width = UNIT
            fig_height = UNIT * (initial_H / initial_W)

        # extract the filename from the full path
        filename = video_file.split('.')[0]

        # draw scatterplot
        plt.figure(figsize=(fig_width, fig_height))
        plt.scatter(X, Y)
        plt.ylim([0, initial_H])
        plt.xlim([0, initial_W])
        plt.axis("off")
        plt.savefig(os.path.join(OUTPUT_DIR, filename + '-scatter.pdf'))

        # draw heatmap
        BINS = 20
        plt.figure(figsize=(fig_width, fig_height))
        plt.hist2d(X, Y, bins=25, range= [[0, initial_W], [0, initial_H]], density=True, cmap='Blues')
        #plt.colorbar()
        plt.axis('off')
        plt.savefig(os.path.join(OUTPUT_DIR, filename + '-heatmap.pdf'))

        print("DONE! It took %f seconds." % (time.time() - start_time), flush=True)