Skip to content

Commit 16461d9

Browse files
committed
enable native screen capturing via xlib and pre-compiled C library, speeding up the capturing process quite a bit. leave old python-only xlib capturing in the code as screenpy config
1 parent a31e7ca commit 16461d9

File tree

6 files changed

+104
-48
lines changed

6 files changed

+104
-48
lines changed

ansible/roles/tf_object_detection/tasks/main.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,7 @@
4242
args:
4343
chdir: "../../{{ tf_models_repo_name }}/research/"
4444

45+
- name: Compile the grab_screen C code used to natively grab the screen via xlib
46+
command: gcc -shared -O3 -Wall -fPIC -Wl,-soname,prtscn -o grab_screen.so grab_screen.c -lX11
47+
args:
48+
chdir: "../stuff/"

config/config.obj_detect.sample.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,12 @@ model_dl_file_format: '.tar.gz'
1010
# re-calculates and displays FPS rate every x seconds
1111
fps_interval: 3
1212

13-
# choose your input: screen, video
13+
# choose your input: screen, screenpy, video
14+
## Note: screenpy (which uses Xlib Python-only) takes about 1586ms to capture the screen, screen (native using C lib) about 30ms (!) for a 1920x1080 screen (for a smaller screen the difference becomes smaller)
1415
input_type: screen
1516

1617
# for video input: choose either the device id (camera index) or a filename
17-
## will be passed to OpenCV VideoCapture
18+
## Note: Value will be passed to OpenCV VideoCapture (see OpenCV documentation)
1819
#input_video: '../opencv_extra/testdata/highgui/video/big_buck_bunny.mp4'
1920
input_video: 0
2021

obj_detect.py

Lines changed: 23 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import tarfile
66
import tensorflow as tf
77
import zipfile
8-
from datetime import datetime
8+
from datetime import datetime, timedelta
99
from Xlib import display
1010
import cv2
1111
import yaml
@@ -19,35 +19,32 @@
1919
sys.path.append('../tensorflow_models/research/object_detection')
2020

2121
from stuff.helper import FPS, Visualizer
22-
from stuff.input import ScreenInput, VideoInput
22+
from stuff.input import ScreenInput, ScreenPyInput, VideoInput
2323

2424
# Load config values from config.obj_detect.sample.yml (as default values) updated by optional user-specific config.obj_detect.yml
2525
## see also http://treyhunner.com/2016/02/how-to-merge-dictionaries-in-python/
2626
cfg = yaml.load(open("config/config.obj_detect.sample.yml", 'r'))
2727
if os.path.isfile("config/config.obj_detect.yml"):
2828
cfg_user = yaml.load(open("config/config.obj_detect.yml", 'r'))
2929
cfg.update(cfg_user)
30-
#for section in cfg:
31-
# print(section, ":", cfg[section])
3230

3331
# Define input
3432
screen = display.Display().screen().root.get_geometry()
3533
if cfg['input_type'] == 'screen':
3634
input = ScreenInput(0, 0, int(screen.width/2), int(screen.height/2))
35+
elif cfg['input_type'] == 'screenpy':
36+
input = ScreenPyInput(0, 0, int(screen.width/2), int(screen.height/2))
3737
elif cfg['input_type'] == 'video':
3838
input = VideoInput(cfg['input_video'])
3939
else:
4040
print('No valid input type given. Exit.')
4141
sys.exit()
4242

4343
# Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.
44-
# See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.
45-
44+
# See the detection model zoo(object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.
4645
# Path to frozen detection graph. This is the actual model that is used for the object detection.
4746
PATH_TO_CKPT = '../' + cfg['model_name'] + '/frozen_inference_graph.pb'
4847

49-
50-
5148
# ## Download Model
5249
MODEL_FILE = cfg['model_name'] + cfg['model_dl_file_format']
5350
if not os.path.isfile(PATH_TO_CKPT):
@@ -72,13 +69,6 @@
7269
od_graph_def.ParseFromString(serialized_graph)
7370
tf.import_graph_def(od_graph_def, name='')
7471

75-
# # Detection
76-
PATH_TO_TEST_IMAGES_DIR = 'test_images'
77-
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]
78-
79-
# Size, in inches, of the output images.
80-
IMAGE_SIZE = (12, 8)
81-
8272
with detection_graph.as_default():
8373
with tf.Session(graph=detection_graph) as sess:
8474
# Definite input and output Tensors for detection_graph
@@ -97,37 +87,30 @@
9787
vis = Visualizer(cfg['visualizer_enabled'])
9888

9989
while(input.isActive()):
90+
startTime=datetime.now()
10091

101-
# startTime=datetime.now()
102-
103-
ret, image_np = input.getImage()
104-
if not ret:
105-
print("No frames grabbed from input (anymore). Exit.")
106-
break
92+
ret, image_np = input.getImage()
93+
if not ret:
94+
print("No frames grabbed from input (anymore). Exit.")
95+
break
10796

108-
# timeElapsed=datetime.now()-startTime
109-
# print('1 Time elpased (hh:mm:ss.ms) {}'.format(timeElapsed))
110-
# startTime=datetime.now()
97+
timeElapsed=datetime.now()-startTime
98+
# print('1 Time elpased (hh:mm:ss.ms) {}'.format(timeElapsed))
99+
startTime=datetime.now()
111100

112-
# for image_path in TEST_IMAGE_PATHS:
113-
# image = Image.open(image_path)
114-
# the array based representation of the image will be used later in order to prepare the
115-
# result image with boxes and labels on it.
116-
# image_np = load_image_into_numpy_array(image)
101+
# Run the detection (expand dimensions since the model expects images to have shape: [1, None, None, 3])
102+
image_np_expanded = np.expand_dims(image_np, axis=0)
103+
(boxes, scores, classes, num) = sess.run([detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_np_expanded})
117104

118-
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
119-
image_np_expanded = np.expand_dims(image_np, axis=0)
120-
# Actual detection.
121-
(boxes, scores, classes, num) = sess.run(
122-
[detection_boxes, detection_scores, detection_classes, num_detections],
123-
feed_dict={image_tensor: image_np_expanded})
105+
# print(boxes, scores, classes, num)
124106

125-
ret = vis.show(image_np, boxes, classes, scores)
126-
if not ret:
127-
print("User asked to quit. Exit")
128-
break
107+
vis.draw(image_np, boxes, classes, scores)
108+
ret = vis.show(image_np)
109+
if not ret:
110+
print("User asked to quit. Exit")
111+
break
129112

130-
fps.update()
113+
fps.update()
131114

132115
fps.stop()
133116
print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))

stuff/grab_screen.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// based on https://stackoverflow.com/a/16141058/860756 with minor tweaks
2+
3+
#include <stdio.h>
4+
#include <X11/X.h>
5+
#include <X11/Xutil.h>
6+
7+
void getScreen(const int, const int, const int, const int, unsigned char *);
8+
void getScreen(const int xx,const int yy,const int W, const int H, /*out*/ unsigned char * data)
9+
{
10+
Display *display = XOpenDisplay(NULL);
11+
Window root = DefaultRootWindow(display);
12+
13+
XImage *image = XGetImage(display,root, xx,yy, W,H, AllPlanes, ZPixmap);
14+
15+
unsigned long red_mask = image->red_mask;
16+
unsigned long green_mask = image->green_mask;
17+
unsigned long blue_mask = image->blue_mask;
18+
int x, y;
19+
int ii = 0;
20+
for (y = 0; y < H; y++) {
21+
for (x = 0; x < W; x++) {
22+
unsigned long pixel = XGetPixel(image,x,y);
23+
unsigned char blue = (pixel & blue_mask);
24+
unsigned char green = (pixel & green_mask) >> 8;
25+
unsigned char red = (pixel & red_mask) >> 16;
26+
27+
data[ii + 2] = red;
28+
data[ii + 1] = green;
29+
data[ii + 0] = blue;
30+
ii += 3;
31+
}
32+
}
33+
XDestroyImage(image);
34+
XDestroyWindow(display, root);
35+
XCloseDisplay(display);
36+
}

stuff/helper.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@ def __init__(self, enabled):
5454
self._windowPlaced = False
5555
self._screen = display.Display().screen().root.get_geometry()
5656

57-
def show(self, image_np, boxes, classes, scores):
57+
def draw(self, image_np, boxes, classes, scores):
5858
if not self._enabled:
59-
return True
59+
return
6060

6161
vis_util.visualize_boxes_and_labels_on_image_array(
6262
image_np,
@@ -67,6 +67,10 @@ def show(self, image_np, boxes, classes, scores):
6767
use_normalized_coordinates=True,
6868
line_thickness=8)
6969

70+
def show(self, image_np):
71+
if not self._enabled:
72+
return True
73+
7074
cv2.imshow('Visualizer', image_np) # alternatively as 2nd param: cv2.resize(image_np, (800, 600)))
7175
if not self._windowPlaced:
7276
cv2.moveWindow('Visualizer', (int)((self._screen.width-image_np.shape[1])/2), (int)((self._screen.height-image_np.shape[0])/2))

stuff/input.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,47 @@
1+
import os
2+
import ctypes
13
import numpy as np
24
from Xlib import display, X
35
from PIL import Image
46
import cv2
57

6-
78
class ScreenInput:
9+
# Natively captures the screen using Xlib and our pre-compiled grab_screen library
10+
# see also https://stackoverflow.com/a/16141058/860756
811
def __init__(self, startX, startY, endX, endY):
9-
self.root = display.Display().screen().root
10-
self.reso = self.root.get_geometry()
12+
self.startX = startX
13+
self.startY = startY
14+
self.width = endX-startX
15+
self.height = endY-startY
16+
17+
self._grab = ctypes.CDLL(os.path.dirname(os.path.abspath(__file__)) + os.path.sep + 'grab_screen.so')
18+
self._size = ctypes.c_ubyte * self.width * self.height * 3
19+
20+
def isActive(self):
21+
return True
1122

23+
def getImage(self):
24+
self._grab.getScreen.argtypes = []
25+
result = (self._size)()
26+
self._grab.getScreen(self.startX,self.startY, self.width, self.height, result)
27+
image = Image.frombuffer('RGB', (self.width, self.height), result, 'raw', 'RGB', 0, 1)
28+
image_np = np.array(image);
29+
return True, image_np
30+
31+
def cleanup(self):
32+
pass
33+
34+
class ScreenPyInput:
35+
# Capture the screen using Xlib and Python-only (slower)
36+
def __init__(self, startX, startY, endX, endY):
1237
self.startX = startX
1338
self.startY = startY
1439
self.width = endX-startX
1540
self.height = endY-startY
1641

42+
self.root = display.Display().screen().root
43+
self.reso = self.root.get_geometry()
44+
1745
def isActive(self):
1846
return True
1947

@@ -26,8 +54,8 @@ def getImage(self):
2654
def cleanup(self):
2755
pass
2856

29-
3057
class VideoInput:
58+
# Capture video (either via device (e.g. camera) or video files) using OpenCV
3159
def __init__(self, input):
3260
self.cap = cv2.VideoCapture(input)
3361

0 commit comments

Comments
 (0)