From fe05c51add02c24f69d6ae85bfc705740b94021b Mon Sep 17 00:00:00 2001
From: Godfrey Tutu <godfreytutu@Godfreys-MacBook-Pro.local>
Date: Mon, 22 Jun 2020 20:19:13 +0100
Subject: [PATCH] feature(build inference pipeline): - Integrate model and
 build out the inference pipeline - Integrate the mouse controller - Ensured
 the mouse controller controls the mouse based on output from gaze estimation
 model

---
 requirements.txt          |  3 +-
 src/gaze_estimation.py    | 48 +++---------------------
 src/inference_pipeline.py | 77 +++++++++++++++++++++++++++++++++++++++
 src/input_feeder.py       |  1 +
 4 files changed, 86 insertions(+), 43 deletions(-)
 create mode 100644 src/inference_pipeline.py

diff --git a/requirements.txt b/requirements.txt
index 6d4cec0..39f0f9f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,5 @@ numpy==1.17.4
 Pillow==6.2.1
 requests==2.22.0
 virtualenv==16.7.9
-pyyaml==5.3.1
\ No newline at end of file
+pyyaml==5.3.1
+pyautogui==0.9.50
diff --git a/src/gaze_estimation.py b/src/gaze_estimation.py
index 7127d97..a29aec8 100644
--- a/src/gaze_estimation.py
+++ b/src/gaze_estimation.py
@@ -8,13 +8,6 @@
 import numpy as np
 from openvino.inference_engine import IENetwork, IECore
 
-# import models
-from head_pose_estimation import HeadPoseEstimation
-from face_detection import FaceDetector
-from facial_landmarks_detection import FacialLandmarksDetector
-
-from draw_image import draw_boxes
-
 class GazeEstimation:
     '''
     Class for the Face Detection Model.
@@ -69,7 +62,7 @@ def preprocess_input(self, inputs):
         left_eye_image = inputs['left_eye_image']
         right_eye_image = inputs['right_eye_image']
         head_pose = inputs['head_pose_angles']
-        print(left_eye_image.shape, right_eye_image.shape, head_pose.shape)
+
         # shape for left and right eyes are the same
         # using the shape of left eye
         height = self.input_shapes['left_eye_image'][2]
@@ -93,49 +86,20 @@ def preprocess_output(self, outputs):
         '''
         raise NotImplementedError
 
-def main():
+def main(left_eye, right_eye, head_pose):
     CPU_EXTENSION_MAC = '/opt/intel/openvino_2019.3.376/deployment_tools/inference_engine/lib/intel64/libcpu_extension.dylib'
     gaze_model = 'models/intel/gaze-estimation-adas-0002/FP16/gaze-estimation-adas-0002'
-    face_detector_model = 'models/intel/face-detection-adas-binary-0001/INT1/face-detection-adas-binary-0001'
-    facial_landmark_model = 'models/intel/landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009'
-    head_pose_model = 'models/intel/head-pose-estimation-adas-0001/FP16/head-pose-estimation-adas-0001'
 
-    image = 'bin/test-image1.jpg'
-
-    # Initialize the models
-    face_detector = FaceDetector(model_name=face_detector_model, device='CPU', extensions=CPU_EXTENSION_MAC)
-    facial_landmarks = FacialLandmarksDetector(model_name=facial_landmark_model, device='CPU', extensions=CPU_EXTENSION_MAC)
-    head_pose_estimation = HeadPoseEstimation(model_name=head_pose_model, device='CPU', extensions=CPU_EXTENSION_MAC)
     gaze_estimation = GazeEstimation(model_name=gaze_model, device='CPU', extensions=CPU_EXTENSION_MAC)
 
     # Load the models
-    face_detector.load_model()
-    facial_landmarks.load_model()
-    head_pose_estimation.load_model()
     gaze_estimation.load_model()
-
-    image = cv2.imread(image)
-    pred = face_detector.predict(image)
-    image = face_detector.preprocess_output(pred, image)
-
-
-    head_pose = head_pose_estimation.predict(image[0])
-    head_pose = np.array([head_pose])
-
-    landmarks = facial_landmarks.predict(image[0])
-    eyes_coords = facial_landmarks.preprocess_output(landmarks[0])
-    eyes = facial_landmarks.get_eyes(eyes_coords, image[0])
-    left_eye_image = eyes['left_eye']
-    right_eye_image = eyes['right_eye']
-    cv2.imwrite('new_left_eyes.jpg', left_eye_image)
-    cv2.imwrite('new_right_eyes.jpg', right_eye_image)
-
     gaze_estimate = gaze_estimation.predict({
-        'left_eye_image': eyes['left_eye'],
-        'right_eye_image': eyes['right_eye'],
+        'left_eye_image': left_eye,
+        'right_eye_image': right_eye,
         'head_pose_angles': head_pose})
-    
-    print(gaze_estimate)
+
+    return gaze_estimate
 
 if __name__ == '__main__':
     main()
diff --git a/src/inference_pipeline.py b/src/inference_pipeline.py
new file mode 100644
index 0000000..4d271dc
--- /dev/null
+++ b/src/inference_pipeline.py
@@ -0,0 +1,77 @@
+import os
+import sys
+import cv2
+import numpy as np
+
+from head_pose_estimation import HeadPoseEstimation
+from face_detection import FaceDetector
+from facial_landmarks_detection import FacialLandmarksDetector
+from gaze_estimation import GazeEstimation, main as gazer
+
+from draw_image import draw_boxes
+from input_feeder import InputFeeder
+from mouse_controller import MouseController
+
+def main():
+    CPU_EXTENSION_MAC = '/opt/intel/openvino_2019.3.376/deployment_tools/inference_engine/lib/intel64/libcpu_extension.dylib'
+    gaze_model = 'models/intel/gaze-estimation-adas-0002/FP16/gaze-estimation-adas-0002'
+    face_detector_model = 'models/intel/face-detection-adas-binary-0001/INT1/face-detection-adas-binary-0001'
+    facial_landmark_model = 'models/intel/landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009'
+    head_pose_model = 'models/intel/head-pose-estimation-adas-0001/FP16/head-pose-estimation-adas-0001'
+
+    image = 'bin/test-image1.jpg'
+
+    input_feeder = InputFeeder('video', 'bin/demo.mp4')
+    # input_feeder = InputFeeder('cam', 'bin/demo.mp4')
+    input_feeder.load_data()
+
+    for image in input_feeder.next_batch():
+        # Initialize the models
+        face_detector = FaceDetector(model_name=face_detector_model, device='CPU', extensions=CPU_EXTENSION_MAC)
+        facial_landmarks = FacialLandmarksDetector(model_name=facial_landmark_model, device='CPU', extensions=CPU_EXTENSION_MAC)
+        head_pose_estimation = HeadPoseEstimation(model_name=head_pose_model, device='CPU', extensions=CPU_EXTENSION_MAC)
+        gaze_estimation = GazeEstimation(model_name=gaze_model, device='CPU', extensions=CPU_EXTENSION_MAC)
+
+        # Load the models
+        face_detector.load_model()
+        facial_landmarks.load_model()
+        head_pose_estimation.load_model()
+        gaze_estimation.load_model()
+
+        try:
+            if image is None:
+                raise TypeError
+            else:
+                pred = face_detector.predict(image)
+                image = face_detector.preprocess_output(pred, image)
+
+            head_pose = head_pose_estimation.predict(image[0])
+            head_pose = np.array([head_pose])
+        except IndexError:
+            print('No more frame to read')
+            input_feeder.close()
+            return
+        except TypeError:
+            print('No more frame to read from stream')
+            input_feeder.close()
+            return
+        
+        landmarks = facial_landmarks.predict(image[0])
+        eyes_coords = facial_landmarks.preprocess_output(landmarks[0])
+        eyes = facial_landmarks.get_eyes(eyes_coords, image[0])
+        left_eye_image = eyes['left_eye']
+        right_eye_image = eyes['right_eye']
+        cv2.imwrite('new_left_eyes.jpg', left_eye_image)
+        cv2.imwrite('new_right_eyes.jpg', right_eye_image)
+
+        gaze_estimate = gaze_estimation.predict({
+            'left_eye_image': eyes['left_eye'],
+            'right_eye_image': eyes['right_eye'],
+            'head_pose_angles': head_pose})
+        
+        control_mouse = MouseController('low', 'fast')
+        if gaze_estimate[0][0]:
+            control_mouse.move(gaze_estimate[0][0], gaze_estimate[0][1])
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/src/input_feeder.py b/src/input_feeder.py
index aff386d..71461b8 100644
--- a/src/input_feeder.py
+++ b/src/input_feeder.py
@@ -28,6 +28,7 @@ def load_data(self):
             self.cap=cv2.VideoCapture(0)
         else:
             self.cap=cv2.imread(self.input_file)
+        
 
     def next_batch(self):
         '''