From fe05c51add02c24f69d6ae85bfc705740b94021b Mon Sep 17 00:00:00 2001 From: Godfrey Tutu Date: Mon, 22 Jun 2020 20:19:13 +0100 Subject: [PATCH] feature(build inference pipeline): - Integrate model and build out the inference pipeline - Integrate the mouse controller - Ensured the mouse controller controls the mouse based on output from gaze estimation model --- requirements.txt | 3 +- src/gaze_estimation.py | 48 +++--------------------- src/inference_pipeline.py | 77 +++++++++++++++++++++++++++++++++++++++ src/input_feeder.py | 1 + 4 files changed, 86 insertions(+), 43 deletions(-) create mode 100644 src/inference_pipeline.py diff --git a/requirements.txt b/requirements.txt index 6d4cec0..39f0f9f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ numpy==1.17.4 Pillow==6.2.1 requests==2.22.0 virtualenv==16.7.9 -pyyaml==5.3.1 \ No newline at end of file +pyyaml==5.3.1 +pyautogui==0.9.50 diff --git a/src/gaze_estimation.py b/src/gaze_estimation.py index 7127d97..a29aec8 100644 --- a/src/gaze_estimation.py +++ b/src/gaze_estimation.py @@ -8,13 +8,6 @@ import numpy as np from openvino.inference_engine import IENetwork, IECore -# import models -from head_pose_estimation import HeadPoseEstimation -from face_detection import FaceDetector -from facial_landmarks_detection import FacialLandmarksDetector - -from draw_image import draw_boxes - class GazeEstimation: ''' Class for the Face Detection Model. @@ -69,7 +62,7 @@ def preprocess_input(self, inputs): left_eye_image = inputs['left_eye_image'] right_eye_image = inputs['right_eye_image'] head_pose = inputs['head_pose_angles'] - print(left_eye_image.shape, right_eye_image.shape, head_pose.shape) + # shape for left and right eyes are the same # using the shape of left eye height = self.input_shapes['left_eye_image'][2] @@ -93,49 +86,20 @@ def preprocess_output(self, outputs): ''' raise NotImplementedError -def main(): +def main(left_eye, right_eye, head_pose): CPU_EXTENSION_MAC = '/opt/intel/openvino_2019.3.376/deployment_tools/inference_engine/lib/intel64/libcpu_extension.dylib' gaze_model = 'models/intel/gaze-estimation-adas-0002/FP16/gaze-estimation-adas-0002' - face_detector_model = 'models/intel/face-detection-adas-binary-0001/INT1/face-detection-adas-binary-0001' - facial_landmark_model = 'models/intel/landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009' - head_pose_model = 'models/intel/head-pose-estimation-adas-0001/FP16/head-pose-estimation-adas-0001' - image = 'bin/test-image1.jpg' - - # Initialize the models - face_detector = FaceDetector(model_name=face_detector_model, device='CPU', extensions=CPU_EXTENSION_MAC) - facial_landmarks = FacialLandmarksDetector(model_name=facial_landmark_model, device='CPU', extensions=CPU_EXTENSION_MAC) - head_pose_estimation = HeadPoseEstimation(model_name=head_pose_model, device='CPU', extensions=CPU_EXTENSION_MAC) gaze_estimation = GazeEstimation(model_name=gaze_model, device='CPU', extensions=CPU_EXTENSION_MAC) # Load the models - face_detector.load_model() - facial_landmarks.load_model() - head_pose_estimation.load_model() gaze_estimation.load_model() - - image = cv2.imread(image) - pred = face_detector.predict(image) - image = face_detector.preprocess_output(pred, image) - - - head_pose = head_pose_estimation.predict(image[0]) - head_pose = np.array([head_pose]) - - landmarks = facial_landmarks.predict(image[0]) - eyes_coords = facial_landmarks.preprocess_output(landmarks[0]) - eyes = facial_landmarks.get_eyes(eyes_coords, image[0]) - left_eye_image = eyes['left_eye'] - right_eye_image = eyes['right_eye'] - cv2.imwrite('new_left_eyes.jpg', left_eye_image) - cv2.imwrite('new_right_eyes.jpg', right_eye_image) - gaze_estimate = gaze_estimation.predict({ - 'left_eye_image': eyes['left_eye'], - 'right_eye_image': eyes['right_eye'], + 'left_eye_image': left_eye, + 'right_eye_image': right_eye, 'head_pose_angles': head_pose}) - - print(gaze_estimate) + + return gaze_estimate if __name__ == '__main__': main() diff --git a/src/inference_pipeline.py b/src/inference_pipeline.py new file mode 100644 index 0000000..4d271dc --- /dev/null +++ b/src/inference_pipeline.py @@ -0,0 +1,77 @@ +import os +import sys +import cv2 +import numpy as np + +from head_pose_estimation import HeadPoseEstimation +from face_detection import FaceDetector +from facial_landmarks_detection import FacialLandmarksDetector +from gaze_estimation import GazeEstimation, main as gazer + +from draw_image import draw_boxes +from input_feeder import InputFeeder +from mouse_controller import MouseController + +def main(): + CPU_EXTENSION_MAC = '/opt/intel/openvino_2019.3.376/deployment_tools/inference_engine/lib/intel64/libcpu_extension.dylib' + gaze_model = 'models/intel/gaze-estimation-adas-0002/FP16/gaze-estimation-adas-0002' + face_detector_model = 'models/intel/face-detection-adas-binary-0001/INT1/face-detection-adas-binary-0001' + facial_landmark_model = 'models/intel/landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009' + head_pose_model = 'models/intel/head-pose-estimation-adas-0001/FP16/head-pose-estimation-adas-0001' + + image = 'bin/test-image1.jpg' + + input_feeder = InputFeeder('video', 'bin/demo.mp4') + # input_feeder = InputFeeder('cam', 'bin/demo.mp4') + input_feeder.load_data() + + for image in input_feeder.next_batch(): + # Initialize the models + face_detector = FaceDetector(model_name=face_detector_model, device='CPU', extensions=CPU_EXTENSION_MAC) + facial_landmarks = FacialLandmarksDetector(model_name=facial_landmark_model, device='CPU', extensions=CPU_EXTENSION_MAC) + head_pose_estimation = HeadPoseEstimation(model_name=head_pose_model, device='CPU', extensions=CPU_EXTENSION_MAC) + gaze_estimation = GazeEstimation(model_name=gaze_model, device='CPU', extensions=CPU_EXTENSION_MAC) + + # Load the models + face_detector.load_model() + facial_landmarks.load_model() + head_pose_estimation.load_model() + gaze_estimation.load_model() + + try: + if image is None: + raise TypeError + else: + pred = face_detector.predict(image) + image = face_detector.preprocess_output(pred, image) + + head_pose = head_pose_estimation.predict(image[0]) + head_pose = np.array([head_pose]) + except IndexError: + print('No more frame to read') + input_feeder.close() + return + except TypeError: + print('No more frame to read from stream') + input_feeder.close() + return + + landmarks = facial_landmarks.predict(image[0]) + eyes_coords = facial_landmarks.preprocess_output(landmarks[0]) + eyes = facial_landmarks.get_eyes(eyes_coords, image[0]) + left_eye_image = eyes['left_eye'] + right_eye_image = eyes['right_eye'] + cv2.imwrite('new_left_eyes.jpg', left_eye_image) + cv2.imwrite('new_right_eyes.jpg', right_eye_image) + + gaze_estimate = gaze_estimation.predict({ + 'left_eye_image': eyes['left_eye'], + 'right_eye_image': eyes['right_eye'], + 'head_pose_angles': head_pose}) + + control_mouse = MouseController('low', 'fast') + if gaze_estimate[0][0]: + control_mouse.move(gaze_estimate[0][0], gaze_estimate[0][1]) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/src/input_feeder.py b/src/input_feeder.py index aff386d..71461b8 100644 --- a/src/input_feeder.py +++ b/src/input_feeder.py @@ -28,6 +28,7 @@ def load_data(self): self.cap=cv2.VideoCapture(0) else: self.cap=cv2.imread(self.input_file) + def next_batch(self): '''