Skip to content

Commit 984bbe8

Browse files
committed
outsourcing input definition (can be either screen or video) to separate classes and let user specify it in the provided config yaml file
1 parent 451a314 commit 984bbe8

File tree

4 files changed

+113
-33
lines changed

4 files changed

+113
-33
lines changed

config/config.obj_detect.sample.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,14 @@
66
model_name: 'ssd_mobilenet_v1_coco_11_06_2017'
77
model_dl_base_path: 'http://download.tensorflow.org/models/object_detection/'
88
model_dl_file_format: '.tar.gz'
9+
10+
# re-calculates and displays FPS rate every x seconds
11+
fps_interval: 3
12+
13+
# choose your input: screen, video
14+
input_type: screen
15+
16+
# for video input: choose either the device id (camera index) or a filename
17+
## will be passed to OpenCV VideoCapture
18+
#input_video: '../opencv_extra/testdata/highgui/video/big_buck_bunny.mp4'
19+
input_video: 0

obj_detect.py

Lines changed: 27 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,14 @@
66
import tensorflow as tf
77
import zipfile
88
import time
9+
from Xlib import display
910
import cv2
1011
import yaml
1112

12-
from Xlib import display, X
1313

1414
from collections import defaultdict
1515
from io import StringIO
16-
from PIL import Image
17-
18-
#cap = cv2.VideoCapture(0)
19-
#cap = cv2.VideoCapture('../opencv_extra/testdata/highgui/video/big_buck_bunny.mp4')
16+
#from PIL import Image
2017

2118
sys.path.append('../tensorflow_models/research')
2219
sys.path.append('../tensorflow_models/research/slim')
@@ -25,6 +22,8 @@
2522
from utils import label_map_util
2623
from utils import visualization_utils as vis_util
2724

25+
from stuff.helper import FPS
26+
from stuff.input import ScreenInput, VideoInput
2827

2928
# Load config values from config.obj_detect.sample.yml (as default values) updated by optional user-specific config.obj_detect.yml
3029
## see also http://treyhunner.com/2016/02/how-to-merge-dictionaries-in-python/
@@ -35,7 +34,15 @@
3534
#for section in cfg:
3635
# print(section, ":", cfg[section])
3736

38-
37+
# Define input
38+
screen = display.Display().screen().root.get_geometry()
39+
if cfg['input_type'] == 'screen':
40+
input = ScreenInput(0, 0, int(screen.width/2), int(screen.height/2))
41+
elif cfg['input_type'] == 'video':
42+
input = VideoInput(cfg['input_video'])
43+
else:
44+
print('No valid input type given. Exit.')
45+
sys.exit()
3946

4047
# Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.
4148
# See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.
@@ -97,33 +104,20 @@
97104
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
98105
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
99106

100-
# for frame rate calculation
101-
start_time = time.time()
102-
x = 3 # displays the frame rate every x seconds
103-
counter = 0
107+
# TODO: Usually FPS calculation lives in a separate thread. As is now, the interval is a minimum value for each iteration.
108+
fps = FPS(cfg['fps_interval']).start()
104109

105110
windowPlacedYet = False
106111

107-
# while(cap.isOpened()):
108-
while(True):
109-
110-
dsp = display.Display()
111-
root = dsp.screen().root
112-
reso = root.get_geometry()
113-
W,H = int(reso.width/2),int(reso.height/2)
114-
#W,H = 600,600
115-
raw = root.get_image(0, 0, W, H, X.ZPixmap, 0xffffffff)
116-
image = Image.frombytes("RGB", (W, H), raw.data, "raw", "RGBX")
117-
image_np = np.array(image);
112+
while(input.isActive()):
113+
ret, image_np = input.getImage()
114+
if not ret:
115+
print("No frames grabbed from input (anymore)! Exit.")
116+
break
118117

119118
# image_np_bgr = np.array(ImageGrab.grab(bbox=(0,0,600,600))) # grab(bbox=(10,10,500,500)) or just grab()
120119
# image_np = cv2.cvtColor(image_np_bgr, cv2.COLOR_BGR2RGB)
121120

122-
# ret, image_np = cap.read()
123-
# if not ret:
124-
# print("Video finished!")
125-
# break
126-
127121
# for image_path in TEST_IMAGE_PATHS:
128122
# image = Image.open(image_path)
129123
# the array based representation of the image will be used later in order to prepare the
@@ -150,14 +144,14 @@
150144
if cv2.waitKey(1) & 0xFF == ord('q'):
151145
break
152146
if not windowPlacedYet:
153-
cv2.moveWindow('object detection', (int)(reso.width/3), (int)(reso.height/3))
147+
cv2.moveWindow('object detection', (int)(screen.width/3), (int)(screen.height/3))
154148
windowPlacedYet = True
155149

156-
counter+=1
157-
if (time.time() - start_time) > x :
158-
print("FPS: ", counter / (time.time() - start_time))
159-
counter = 0
160-
start_time = time.time()
150+
fps.update()
151+
152+
fps.stop()
153+
print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
154+
print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
161155

162-
#cap.release()
156+
input.cleanup()
163157
cv2.destroyAllWindows()

stuff/helper.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import datetime
2+
3+
class FPS:
4+
def __init__(self, interval):
5+
self._glob_start = None
6+
self._glob_end = None
7+
self._glob_numFrames = 0
8+
self._local_start = None
9+
self._local_numFrames = 0
10+
self._interval = interval
11+
12+
def start(self):
13+
self._glob_start = datetime.datetime.now()
14+
self._local_start = self._glob_start
15+
return self
16+
17+
def stop(self):
18+
self._glob_end = datetime.datetime.now()
19+
20+
def update(self):
21+
curr_time = datetime.datetime.now()
22+
curr_local_elapsed = (curr_time - self._local_start).total_seconds()
23+
self._glob_numFrames += 1
24+
self._local_numFrames += 1
25+
if curr_local_elapsed > self._interval:
26+
print("FPS: ", self._local_numFrames / curr_local_elapsed)
27+
self._local_numFrames = 0
28+
self._local_start = curr_time
29+
30+
def elapsed(self):
31+
return (self._glob_end - self._glob_start).total_seconds()
32+
33+
def fps(self):
34+
return self._glob_numFrames / self.elapsed()

stuff/input.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import numpy as np
2+
from Xlib import display, X
3+
from PIL import Image
4+
import cv2
5+
6+
7+
class ScreenInput:
8+
def __init__(self, startX, startY, endX, endY):
9+
self.root = display.Display().screen().root
10+
self.reso = self.root.get_geometry()
11+
12+
self.startX = startX
13+
self.startY = startY
14+
self.width = endX-startX
15+
self.height = endY-startY
16+
17+
def isActive(self):
18+
return True
19+
20+
def getImage(self):
21+
raw = self.root.get_image(self.startX, self.startY, self.width, self.height, X.ZPixmap, 0xffffffff)
22+
image = Image.frombytes("RGB", (self.width, self.height), raw.data, "raw", "RGBX")
23+
image_np = np.array(image);
24+
return True, image_np
25+
26+
def cleanup(self):
27+
pass
28+
29+
30+
class VideoInput:
31+
def __init__(self, input):
32+
self.cap = cv2.VideoCapture(input)
33+
34+
def isActive(self):
35+
return self.cap.isOpened()
36+
37+
def getImage(self):
38+
return self.cap.read()
39+
40+
def cleanup(self):
41+
self.cap.release()

0 commit comments

Comments
 (0)