Skip to content

Commit

Permalink
google extractors using _to_df
Browse files Browse the repository at this point in the history
  • Loading branch information
qmac committed Feb 12, 2018
1 parent dce9fc4 commit 9c8447d
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 59 deletions.
67 changes: 26 additions & 41 deletions pliers/extractors/google.py
Expand Up @@ -4,6 +4,7 @@
from pliers.transformers import GoogleVisionAPITransformer
from pliers.extractors.base import ExtractorResult
import numpy as np
import pandas as pd


class GoogleVisionAPIExtractor(GoogleVisionAPITransformer, ImageExtractor):
Expand All @@ -19,17 +20,12 @@ def _extract(self, stims):
results = []
for i, response in enumerate(responses):
if response and self.response_object in response:
annotations = response[self.response_object]
features, values = self._parse_annotations(annotations)
values = [values]
results.append(ExtractorResult(values, stims[i], self,
features=features))
raw = response[self.response_object]
results.append(ExtractorResult(None, stims[i], self, raw=raw))
elif 'error' in response:
raise Exception(response['error']['message'])

else:
results.append(ExtractorResult([[]], stims[i], self,
features=[]))
results.append(ExtractorResult(None, stims[i], self, raw=[{}]))

return results

Expand All @@ -41,13 +37,12 @@ class GoogleVisionAPIFaceExtractor(GoogleVisionAPIExtractor):
request_type = 'FACE_DETECTION'
response_object = 'faceAnnotations'

def _parse_annotations(self, annotations):
features = []
values = []

def _to_df(self, result):
annotations = result.raw
if self.handle_annotations == 'first':
annotations = [annotations[0]]

face_results = []
for i, annotation in enumerate(annotations):
data_dict = {}
for field, val in annotation.items():
Expand All @@ -68,11 +63,9 @@ def _parse_annotations(self, annotations):
else:
data_dict[field] = val

names = ['face%d_%s' % (i+1, n) for n in data_dict.keys()]
features += names
values += list(data_dict.values())
face_results.append(data_dict)

return features, values
return pd.DataFrame(face_results)


class GoogleVisionAPILabelExtractor(GoogleVisionAPIExtractor):
Expand All @@ -82,13 +75,9 @@ class GoogleVisionAPILabelExtractor(GoogleVisionAPIExtractor):
request_type = 'LABEL_DETECTION'
response_object = 'labelAnnotations'

def _parse_annotations(self, annotations):
features = []
values = []
for annotation in annotations:
features.append(annotation['description'])
values.append(annotation['score'])
return features, values
def _to_df(self, result):
res = {label['description']: label['score'] for label in result.raw}
return pd.DataFrame([res])


class GoogleVisionAPIPropertyExtractor(GoogleVisionAPIExtractor):
Expand All @@ -98,15 +87,13 @@ class GoogleVisionAPIPropertyExtractor(GoogleVisionAPIExtractor):
request_type = 'IMAGE_PROPERTIES'
response_object = 'imagePropertiesAnnotation'

def _parse_annotations(self, annotation):
colors = annotation['dominantColors']['colors']
features = []
values = []
def _to_df(self, result):
colors = result.raw['dominantColors']['colors']
data_dict = {}
for color in colors:
rgb = color['color']
features.append((rgb['red'], rgb['green'], rgb['blue']))
values.append(color['score'])
return features, values
data_dict[(rgb['red'], rgb['green'], rgb['blue'])] = color['score']
return pd.DataFrame([data_dict])


class GoogleVisionAPISafeSearchExtractor(GoogleVisionAPIExtractor):
Expand All @@ -116,8 +103,8 @@ class GoogleVisionAPISafeSearchExtractor(GoogleVisionAPIExtractor):
request_type = 'SAFE_SEARCH_DETECTION'
response_object = 'safeSearchAnnotation'

def _parse_annotations(self, annotation):
return list(annotation.keys()), list(annotation.values())
def _to_df(self, result):
return pd.DataFrame([result.raw])


class GoogleVisionAPIWebEntitiesExtractor(GoogleVisionAPIExtractor):
Expand All @@ -127,12 +114,10 @@ class GoogleVisionAPIWebEntitiesExtractor(GoogleVisionAPIExtractor):
request_type = 'WEB_DETECTION'
response_object = 'webDetection'

def _parse_annotations(self, annotations):
features = []
values = []
if 'webEntities' in annotations:
for annotation in annotations['webEntities']:
if 'description' in annotation and 'score' in annotation:
features.append(annotation['description'])
values.append(annotation['score'])
return features, values
def _to_df(self, result):
data_dict = {}
if 'webEntities' in result.raw:
for entity in result.raw['webEntities']:
if 'description' in entity and 'score' in entity:
data_dict[entity['description']] = entity['score']
return pd.DataFrame([data_dict])
37 changes: 19 additions & 18 deletions pliers/tests/extractors/test_google_extractors.py
Expand Up @@ -4,7 +4,8 @@
GoogleVisionAPIPropertyExtractor,
GoogleVisionAPISafeSearchExtractor,
GoogleVisionAPIWebEntitiesExtractor,
ExtractorResult, merge_results)
ExtractorResult,
merge_results)
from pliers.extractors.google import GoogleVisionAPIExtractor
from pliers.stimuli import ImageStim, VideoStim
import pytest
Expand Down Expand Up @@ -33,12 +34,12 @@ def test_google_vision_api_face_extractor_inits():
filename = join(
get_test_data_path(), 'payloads', 'google_vision_api_face_payload.json')
response = json.load(open(filename, 'r'))
features, data = ext._parse_annotations(response['faceAnnotations'])
assert len(features) == len(data)
assert data[features.index('face1_angerLikelihood')] == 'VERY_UNLIKELY'
assert data[
features.index('face1_landmark_LEFT_EYE_BOTTOM_BOUNDARY_y')] == 257.023
assert np.isnan(data[features.index('face1_boundingPoly_vertex2_y')])
stim = ImageStim(join(get_test_data_path(), 'image', 'obama.jpg'))
res = ExtractorResult(None, stim, ext, raw=response['faceAnnotations'])
df = res.to_df()
assert df['angerLikelihood'][0] == 'VERY_UNLIKELY'
assert df['landmark_LEFT_EYE_BOTTOM_BOUNDARY_y'][0] == 257.023
assert np.isnan(df['boundingPoly_vertex2_y'][0])


@pytest.mark.skipif("'GOOGLE_APPLICATION_CREDENTIALS' not in os.environ")
Expand All @@ -47,9 +48,9 @@ def test_google_vision_api_face_extractor():
filename = join(get_test_data_path(), 'image', 'obama.jpg')
stim = ImageStim(filename)
result = ext.transform(stim).to_df()
assert 'face1_joyLikelihood' in result.columns
assert result['face1_joyLikelihood'][0] == 'VERY_LIKELY'
assert float(result['face1_face_detectionConfidence'][0]) > 0.7
assert 'joyLikelihood' in result.columns
assert result['joyLikelihood'][0] == 'VERY_LIKELY'
assert float(result['face_detectionConfidence'][0]) > 0.7


@pytest.mark.skipif("'GOOGLE_APPLICATION_CREDENTIALS' not in os.environ")
Expand All @@ -59,12 +60,12 @@ def test_google_vision_multiple_face_extraction():
# Only first record
ext = GoogleVisionAPIFaceExtractor(handle_annotations='first')
result1 = ext.transform(stim).to_df()
assert 'face1_joyLikelihood' in result1.columns
assert 'joyLikelihood' in result1.columns
# All records
ext = GoogleVisionAPIFaceExtractor()
result2 = ext.transform(stim).to_df()
assert 'face2_joyLikelihood' in result2.columns
assert result2.shape[1] > result1.shape[1]
assert 'joyLikelihood' in result2.columns
assert result2.shape[0] > result1.shape[0]


@pytest.mark.skipif("'GOOGLE_APPLICATION_CREDENTIALS' not in os.environ")
Expand All @@ -77,23 +78,23 @@ def test_google_vision_face_batch():
result = ext.transform(stims)
result = merge_results(result, format='wide', extractor_names=False)
assert result.shape == (2, 139)
assert 'face1_joyLikelihood' in result.columns
assert result['face1_joyLikelihood'][0] == 'VERY_LIKELY'
assert result['face1_joyLikelihood'][1] == 'VERY_LIKELY'
assert 'joyLikelihood' in result.columns
assert result['joyLikelihood'][0] == 'VERY_LIKELY'
assert result['joyLikelihood'][1] == 'VERY_LIKELY'

video = VideoStim(join(get_test_data_path(), 'video', 'obama_speech.mp4'))
conv = FrameSamplingFilter(every=10)
video = conv.transform(video)
result = ext.transform(video)
result = merge_results(result, format='wide', extractor_names=False)
assert 'face1_joyLikelihood' in result.columns
assert 'joyLikelihood' in result.columns
assert result.shape == (11, 139)

video = VideoStim(join(get_test_data_path(), 'video', 'small.mp4'))
video = conv.transform(video)
result = ext.transform(video)
result = merge_results(result, format='wide', extractor_names=False)
assert 'face1_joyLikelihood' not in result.columns
assert 'joyLikelihood' not in result.columns
assert len(result) == 0


Expand Down

0 comments on commit 9c8447d

Please sign in to comment.