enable native screen capturing via xlib and pre-compiled C library, speeding up the capturing process quite a bit. leave old python-only xlib capturing in the code as screenpy config

sjentzsch · sjentzsch · commit 16461d9013f8 · 2017-10-23T15:41:28.000+02:00
diff --git a/ansible/roles/tf_object_detection/tasks/main.yml b/ansible/roles/tf_object_detection/tasks/main.yml
@@ -42,3 +42,7 @@
   args:
     chdir: "../../{{ tf_models_repo_name }}/research/"
 
+- name: Compile the grab_screen C code used to natively grab the screen via xlib
+  command: gcc -shared -O3 -Wall -fPIC -Wl,-soname,prtscn -o grab_screen.so grab_screen.c -lX11
+  args:
+    chdir: "../stuff/"
diff --git a/config/config.obj_detect.sample.yml b/config/config.obj_detect.sample.yml
@@ -10,11 +10,12 @@ model_dl_file_format: '.tar.gz'
 # re-calculates and displays FPS rate every x seconds
 fps_interval: 3
 
-# choose your input: screen, video
+# choose your input: screen, screenpy, video
+## Note: screenpy (which uses Xlib Python-only) takes about 1586ms to capture the screen, screen (native using C lib) about 30ms (!) for a 1920x1080 screen (for a smaller screen the difference becomes smaller)
 input_type: screen
 
 # for video input: choose either the device id (camera index) or a filename
-## will be passed to OpenCV VideoCapture
+## Note: Value will be passed to OpenCV VideoCapture (see OpenCV documentation)
 #input_video: '../opencv_extra/testdata/highgui/video/big_buck_bunny.mp4'
 input_video: 0
 
diff --git a/obj_detect.py b/obj_detect.py
@@ -5,7 +5,7 @@
 import tarfile
 import tensorflow as tf
 import zipfile
-from datetime import datetime
+from datetime import datetime, timedelta
 from Xlib import display
 import cv2
 import yaml
@@ -19,35 +19,32 @@
 sys.path.append('../tensorflow_models/research/object_detection')
 
 from stuff.helper import FPS, Visualizer
-from stuff.input import ScreenInput, VideoInput
+from stuff.input import ScreenInput, ScreenPyInput, VideoInput
 
 # Load config values from config.obj_detect.sample.yml (as default values) updated by optional user-specific config.obj_detect.yml
 ## see also http://treyhunner.com/2016/02/how-to-merge-dictionaries-in-python/
 cfg = yaml.load(open("config/config.obj_detect.sample.yml", 'r'))
 if os.path.isfile("config/config.obj_detect.yml"):
   cfg_user = yaml.load(open("config/config.obj_detect.yml", 'r'))
   cfg.update(cfg_user)
-#for section in cfg:
-#  print(section, ":", cfg[section])
 
 # Define input
 screen = display.Display().screen().root.get_geometry()
 if cfg['input_type'] == 'screen':
   input = ScreenInput(0, 0, int(screen.width/2), int(screen.height/2))
+elif cfg['input_type'] == 'screenpy':
+  input = ScreenPyInput(0, 0, int(screen.width/2), int(screen.height/2))
 elif cfg['input_type'] == 'video':
   input = VideoInput(cfg['input_video'])
 else:
   print('No valid input type given. Exit.')
   sys.exit()
 
 # Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.
-# See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.
-
+# See the detection model zoo(object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.
 # Path to frozen detection graph. This is the actual model that is used for the object detection.
 PATH_TO_CKPT = '../' + cfg['model_name'] + '/frozen_inference_graph.pb'
 
-
-
 # ## Download Model
 MODEL_FILE = cfg['model_name'] + cfg['model_dl_file_format']
 if not os.path.isfile(PATH_TO_CKPT):
@@ -72,13 +69,6 @@
     od_graph_def.ParseFromString(serialized_graph)
     tf.import_graph_def(od_graph_def, name='')
 
-# # Detection
-PATH_TO_TEST_IMAGES_DIR = 'test_images'
-TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]
-
-# Size, in inches, of the output images.
-IMAGE_SIZE = (12, 8)
-
 with detection_graph.as_default():
   with tf.Session(graph=detection_graph) as sess:
     # Definite input and output Tensors for detection_graph
@@ -97,37 +87,30 @@
     vis = Visualizer(cfg['visualizer_enabled'])
 
     while(input.isActive()):
+      startTime=datetime.now()
 
-#        startTime=datetime.now()
-
-        ret, image_np = input.getImage()
-        if not ret:
-          print("No frames grabbed from input (anymore). Exit.")
-          break
+      ret, image_np = input.getImage()
+      if not ret:
+        print("No frames grabbed from input (anymore). Exit.")
+        break
 
-#        timeElapsed=datetime.now()-startTime
-#        print('1 Time elpased (hh:mm:ss.ms) {}'.format(timeElapsed))
-#        startTime=datetime.now()
+      timeElapsed=datetime.now()-startTime
+#      print('1 Time elpased (hh:mm:ss.ms) {}'.format(timeElapsed))
+      startTime=datetime.now()
 
-#    for image_path in TEST_IMAGE_PATHS:
-#      image = Image.open(image_path)
-      # the array based representation of the image will be used later in order to prepare the
-      # result image with boxes and labels on it.
-#      image_np = load_image_into_numpy_array(image)
+      # Run the detection (expand dimensions since the model expects images to have shape: [1, None, None, 3])
+      image_np_expanded = np.expand_dims(image_np, axis=0)
+      (boxes, scores, classes, num) = sess.run([detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_np_expanded})
 
-        # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
-        image_np_expanded = np.expand_dims(image_np, axis=0)
-        # Actual detection.
-        (boxes, scores, classes, num) = sess.run(
-            [detection_boxes, detection_scores, detection_classes, num_detections],
-            feed_dict={image_tensor: image_np_expanded})
+#      print(boxes, scores, classes, num)
 
-        ret = vis.show(image_np, boxes, classes, scores)
-        if not ret:
-          print("User asked to quit. Exit")
-          break
+      vis.draw(image_np, boxes, classes, scores)
+      ret = vis.show(image_np)
+      if not ret:
+        print("User asked to quit. Exit")
+        break
 
-        fps.update()
+      fps.update()
 
 fps.stop()
 print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
diff --git a/stuff/grab_screen.c b/stuff/grab_screen.c
@@ -0,0 +1,36 @@
+// based on https://stackoverflow.com/a/16141058/860756 with minor tweaks
+
+#include <stdio.h>
+#include <X11/X.h>
+#include <X11/Xutil.h>
+
+void getScreen(const int, const int, const int, const int, unsigned char *);
+void getScreen(const int xx,const int yy,const int W, const int H, /*out*/ unsigned char * data)
+{
+   Display *display = XOpenDisplay(NULL);
+   Window root = DefaultRootWindow(display);
+
+   XImage *image = XGetImage(display,root, xx,yy, W,H, AllPlanes, ZPixmap);
+
+   unsigned long red_mask   = image->red_mask;
+   unsigned long green_mask = image->green_mask;
+   unsigned long blue_mask  = image->blue_mask;
+   int x, y;
+   int ii = 0;
+   for (y = 0; y < H; y++) {
+       for (x = 0; x < W; x++) {
+         unsigned long pixel = XGetPixel(image,x,y);
+         unsigned char blue  = (pixel & blue_mask);
+         unsigned char green = (pixel & green_mask) >> 8;
+         unsigned char red   = (pixel & red_mask) >> 16;
+
+         data[ii + 2] = red;
+         data[ii + 1] = green;
+         data[ii + 0] = blue;
+         ii += 3;
+      }
+   }
+   XDestroyImage(image);
+   XDestroyWindow(display, root);
+   XCloseDisplay(display);
+}
diff --git a/stuff/helper.py b/stuff/helper.py
@@ -54,9 +54,9 @@ def __init__(self, enabled):
         self._windowPlaced = False
         self._screen = display.Display().screen().root.get_geometry()
 
-    def show(self, image_np, boxes, classes, scores):
+    def draw(self, image_np, boxes, classes, scores):
         if not self._enabled:
-          return True
+          return
 
         vis_util.visualize_boxes_and_labels_on_image_array(
             image_np,
@@ -67,6 +67,10 @@ def show(self, image_np, boxes, classes, scores):
             use_normalized_coordinates=True,
             line_thickness=8)
 
+    def show(self, image_np):
+        if not self._enabled:
+          return True
+
         cv2.imshow('Visualizer', image_np) # alternatively as 2nd param: cv2.resize(image_np, (800, 600)))
         if not self._windowPlaced:
           cv2.moveWindow('Visualizer', (int)((self._screen.width-image_np.shape[1])/2), (int)((self._screen.height-image_np.shape[0])/2))
diff --git a/stuff/input.py b/stuff/input.py
@@ -1,19 +1,47 @@
+import os
+import ctypes
 import numpy as np
 from Xlib import display, X
 from PIL import Image
 import cv2
 
-
 class ScreenInput:
+    # Natively captures the screen using Xlib and our pre-compiled grab_screen library
+    # see also https://stackoverflow.com/a/16141058/860756
     def __init__(self, startX, startY, endX, endY):
-        self.root = display.Display().screen().root
-        self.reso = self.root.get_geometry()
+        self.startX = startX
+        self.startY = startY
+        self.width = endX-startX
+        self.height = endY-startY
+
+        self._grab = ctypes.CDLL(os.path.dirname(os.path.abspath(__file__)) + os.path.sep + 'grab_screen.so')
+        self._size = ctypes.c_ubyte * self.width * self.height * 3
+
+    def isActive(self):
+        return True
 
+    def getImage(self):
+        self._grab.getScreen.argtypes = []
+        result = (self._size)()
+        self._grab.getScreen(self.startX,self.startY, self.width, self.height, result)
+        image = Image.frombuffer('RGB', (self.width, self.height), result, 'raw', 'RGB', 0, 1)
+        image_np = np.array(image);
+        return True, image_np
+
+    def cleanup(self):
+        pass
+
+class ScreenPyInput:
+    # Capture the screen using Xlib and Python-only (slower)
+    def __init__(self, startX, startY, endX, endY):
         self.startX = startX
         self.startY = startY
         self.width = endX-startX
         self.height = endY-startY
 
+        self.root = display.Display().screen().root
+        self.reso = self.root.get_geometry()
+
     def isActive(self):
         return True
 
@@ -26,8 +54,8 @@ def getImage(self):
     def cleanup(self):
         pass
 
-
 class VideoInput:
+    # Capture video (either via device (e.g. camera) or video files) using OpenCV
     def __init__(self, input):
         self.cap = cv2.VideoCapture(input)