forked from rasbt/mlxtend
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eyepad_align.py
249 lines (192 loc) · 8.3 KB
/
eyepad_align.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# Sebastian Raschka 2014-2018
# contributor: Vahid Mirjalili
# mlxtend Machine Learning Library Extensions
#
# A class for transforming face images.
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause
import os
import warnings
import numpy as np
from . import extract_face_landmarks
from .utils import read_image
from ..externals.pyprind.progbar import ProgBar
from skimage.transform import warp, AffineTransform, resize
LEFT_INDEX = np.array([36, 37, 38, 39, 40, 41])
RIGHT_INDEX = np.array([42, 43, 44, 45, 46, 47])
class EyepadAlign(object):
"""Class to align/transform face images to facial landmarks,
based on eye alignment.
1. A scaling factor is computed based on distance between the
left and right eye, such that the transformed face image will
have the same eye distance as a reference face image.
2. A transformation is performed based on the eyes' center point.
to align the face based on the reference eye location.
3. Finally, the transformed image is padded with zeros to match
the desired final image size.
Parameters
----------
verbose : int (default=0)
Verbose level to display the progress bar and log messages.
Setting `verbose=1` will print a progress bar upon calling
`fit_directory`.
Attributes
----------
target_landmarks_ : target landmarks to transform new face images to.
Depending on the chosen `fit` parameters, it can be either
(1) assigned to pre-fit shapes,
(2) computed from a single face image
(3) computed as the mean of face landmarks
from all face images in a file directory of face images.
eye_distance_ : the distance between left and right eyes
in the target landmarks.
target_width_ : the width of the transformed output image.
target_height_ : the height of the transformed output image.
For more usage examples, please see
http://rasbt.github.io/mlxtend/user_guide/image/EyepadAlign/
Returns
-------
self : object
"""
def __init__(self, verbose=0):
self.verbose = verbose
def fit_image(self, target_image):
"""Derives facial landmarks from a target image.
Arguments
----------
target_image : `uint8` numpy.array, shape=[width, height, channels]
NumPy array representation of the image data.
Returns
-------
self : object
"""
landmarks = extract_face_landmarks(target_image)
self.target_landmarks_ = landmarks
self.target_width_ = target_image.shape[1]
self.target_height_ = target_image.shape[0]
props = self._calc_eye_properties(self.target_landmarks_)
self.eyes_mid_point_ = props[0]
self.eye_distance_ = props[1]
return self
def fit_directory(self, target_img_dir, target_height,
target_width, file_extensions='.jpg'):
"""
Calculates the average landmarks for all face images
in a directory which will then be set as the target landmark set.
Arguments
----------
target_img_dir : str
Directory containing the images
target_height : int
Expected image height of the images in the directory
target_width : int
Expected image width of the images in the directory
Returns
-------
self : object
"""
self.target_height_ = target_height
self.target_width_ = target_width
file_list = [os.path.relpath(os.path.join(dirpath, file),
target_img_dir)
for (dirpath, dirnames, filenames)
in os.walk(target_img_dir)
for file in filenames if file.endswith(file_extensions)]
if self.verbose >= 1:
print("Fitting the average facial landmarks "
"for %d face images " % (len(file_list)))
landmarks_list = []
if self.verbose >= 1:
pbar = ProgBar(len(file_list))
for f in file_list:
if self.verbose >= 1:
pbar.update()
img = read_image(filename=f, path=target_img_dir)
if self.target_width_ != img.shape[1]:
width_ratio = self.target_width_ / img.shape[1]
height_ratio = self.target_height_ / img.shape[0]
if np.abs(width_ratio - height_ratio) > 0.001: # ignore
continue
img = resize(img, output_shape=(self.target_height_,
self.target_width_),
anti_aliasing=True, mode='reflect')
img = (img*255).astype('uint8')
landmarks = extract_face_landmarks(img)
if np.sum(landmarks) is not None: # i.e., None == no face detected
landmarks_list.append(landmarks)
else:
warnings.warn('No face detected in image %s. Image ignored.' % f)
self.target_landmarks_ = np.mean(landmarks_list, axis=0)
props = self._calc_eye_properties(self.target_landmarks_)
self.eyes_mid_point_ = props[0]
self.eye_distance_ = props[1]
return self
def fit_values(self, target_landmarks, target_width, target_height):
""" Used for determining the eye location from pre-defined
landmark arrays, eliminating the need for re-computing
the average landmarks on a target image or image directory.
Arguments
---------
target_landmarks : np.array, shape=(height, width)
NumPy array containing the locations of the facial landmarks
as determined by `mlxtend.image.extract_face_landmarks`
target_height : int
image height
target_width : int
image width
Returns
-------
self : object
"""
self.target_landmarks_ = target_landmarks
self.target_width_ = target_width
self.target_height_ = target_height
props = self._calc_eye_properties(self.target_landmarks_)
self.eyes_mid_point_ = props[0]
self.eye_distance_ = props[1]
return self
def _calc_eye_properties(self, landmarks):
""" Calculates the face properties:
(1) coordinates of the left-eye
(2) coordinates of the right-eye
(3) the distance between left and right eyes
(4) the middle point between the two eyes
"""
left_eye = np.mean(landmarks[LEFT_INDEX], axis=0)
right_eye = np.mean(landmarks[RIGHT_INDEX], axis=0)
eyes_mid_point = (left_eye + right_eye)/2.0
eye_distance = np.sqrt(np.sum(np.square(left_eye - right_eye)))
return eyes_mid_point, eye_distance
def transform(self, img):
""" transforms a single face image (img) to the target landmarks
based on the location of the eyes by
scaling, translation and cropping (if needed):
(1) Scaling the image so that the distance of the two eyes
in the given image (img) matches the distance of the
two eyes in the target landmarks.
(2) Translation is performed based on the middle point
between the two eyes.
Arguments
---------
img : np.array, shape=(height, width, channels)
Input image to be transformed.
Returns
-------
self : object
"""
if not hasattr(self, 'eyes_mid_point_'):
raise AttributeError('Not fitted, yet. Call one of the `fit*`'
' methods prior to using `transform`.')
landmarks = extract_face_landmarks(img)
if landmarks is None:
return
eyes_mid_point, eye_distance = self._calc_eye_properties(landmarks)
scale = self.eye_distance_ / eye_distance
tr = (self.eyes_mid_point_/scale - eyes_mid_point)
tr = (int(tr[0]*scale), int(tr[1]*scale))
tform = AffineTransform(scale=(scale, scale), rotation=0, shear=0,
translation=tr)
h, w = self.target_height_, self.target_width_
img_tr = warp(img, tform.inverse, output_shape=(h, w))
return np.array(img_tr*255, dtype='uint8')