google extractors using _to_df

PsychoinformaticsLab · Feb 12, 2018 · 9c8447d · 9c8447d
1 parent dce9fc4
commit 9c8447d
Show file tree

Hide file tree

Showing 2 changed files with 45 additions and 59 deletions.
diff --git a/pliers/extractors/google.py b/pliers/extractors/google.py
@@ -4,6 +4,7 @@
 from pliers.transformers import GoogleVisionAPITransformer
 from pliers.extractors.base import ExtractorResult
 import numpy as np
+import pandas as pd
 
 
 class GoogleVisionAPIExtractor(GoogleVisionAPITransformer, ImageExtractor):
@@ -19,17 +20,12 @@ def _extract(self, stims):
         results = []
         for i, response in enumerate(responses):
             if response and self.response_object in response:
-                annotations = response[self.response_object]
-                features, values = self._parse_annotations(annotations)
-                values = [values]
-                results.append(ExtractorResult(values, stims[i], self,
-                                               features=features))
+                raw = response[self.response_object]
+                results.append(ExtractorResult(None, stims[i], self, raw=raw))
             elif 'error' in response:
                 raise Exception(response['error']['message'])
-
             else:
-                results.append(ExtractorResult([[]], stims[i], self,
-                                               features=[]))
+                results.append(ExtractorResult(None, stims[i], self, raw=[{}]))
 
         return results
 
@@ -41,13 +37,12 @@ class GoogleVisionAPIFaceExtractor(GoogleVisionAPIExtractor):
     request_type = 'FACE_DETECTION'
     response_object = 'faceAnnotations'
 
-    def _parse_annotations(self, annotations):
-        features = []
-        values = []
-
+    def _to_df(self, result):
+        annotations = result.raw
         if self.handle_annotations == 'first':
             annotations = [annotations[0]]
 
+        face_results = []
         for i, annotation in enumerate(annotations):
             data_dict = {}
             for field, val in annotation.items():
@@ -68,11 +63,9 @@ def _parse_annotations(self, annotations):
                 else:
                     data_dict[field] = val
 
-            names = ['face%d_%s' % (i+1, n) for n in data_dict.keys()]
-            features += names
-            values += list(data_dict.values())
+            face_results.append(data_dict)
 
-        return features, values
+        return pd.DataFrame(face_results)
 
 
 class GoogleVisionAPILabelExtractor(GoogleVisionAPIExtractor):
@@ -82,13 +75,9 @@ class GoogleVisionAPILabelExtractor(GoogleVisionAPIExtractor):
     request_type = 'LABEL_DETECTION'
     response_object = 'labelAnnotations'
 
-    def _parse_annotations(self, annotations):
-        features = []
-        values = []
-        for annotation in annotations:
-            features.append(annotation['description'])
-            values.append(annotation['score'])
-        return features, values
+    def _to_df(self, result):
+        res = {label['description']: label['score'] for label in result.raw}
+        return pd.DataFrame([res])
 
 
 class GoogleVisionAPIPropertyExtractor(GoogleVisionAPIExtractor):
@@ -98,15 +87,13 @@ class GoogleVisionAPIPropertyExtractor(GoogleVisionAPIExtractor):
     request_type = 'IMAGE_PROPERTIES'
     response_object = 'imagePropertiesAnnotation'
 
-    def _parse_annotations(self, annotation):
-        colors = annotation['dominantColors']['colors']
-        features = []
-        values = []
+    def _to_df(self, result):
+        colors = result.raw['dominantColors']['colors']
+        data_dict = {}
         for color in colors:
             rgb = color['color']
-            features.append((rgb['red'], rgb['green'], rgb['blue']))
-            values.append(color['score'])
-        return features, values
+            data_dict[(rgb['red'], rgb['green'], rgb['blue'])] = color['score']
+        return pd.DataFrame([data_dict])
 
 
 class GoogleVisionAPISafeSearchExtractor(GoogleVisionAPIExtractor):
@@ -116,8 +103,8 @@ class GoogleVisionAPISafeSearchExtractor(GoogleVisionAPIExtractor):
     request_type = 'SAFE_SEARCH_DETECTION'
     response_object = 'safeSearchAnnotation'
 
-    def _parse_annotations(self, annotation):
-        return list(annotation.keys()), list(annotation.values())
+    def _to_df(self, result):
+        return pd.DataFrame([result.raw])
 
 
 class GoogleVisionAPIWebEntitiesExtractor(GoogleVisionAPIExtractor):
@@ -127,12 +114,10 @@ class GoogleVisionAPIWebEntitiesExtractor(GoogleVisionAPIExtractor):
     request_type = 'WEB_DETECTION'
     response_object = 'webDetection'
 
-    def _parse_annotations(self, annotations):
-        features = []
-        values = []
-        if 'webEntities' in annotations:
-            for annotation in annotations['webEntities']:
-                if 'description' in annotation and 'score' in annotation:
-                    features.append(annotation['description'])
-                    values.append(annotation['score'])
-        return features, values
+    def _to_df(self, result):
+        data_dict = {}
+        if 'webEntities' in result.raw:
+            for entity in result.raw['webEntities']:
+                if 'description' in entity and 'score' in entity:
+                    data_dict[entity['description']] = entity['score']
+        return pd.DataFrame([data_dict])
diff --git a/pliers/tests/extractors/test_google_extractors.py b/pliers/tests/extractors/test_google_extractors.py
@@ -4,7 +4,8 @@
                                GoogleVisionAPIPropertyExtractor,
                                GoogleVisionAPISafeSearchExtractor,
                                GoogleVisionAPIWebEntitiesExtractor,
-                               ExtractorResult, merge_results)
+                               ExtractorResult,
+                               merge_results)
 from pliers.extractors.google import GoogleVisionAPIExtractor
 from pliers.stimuli import ImageStim, VideoStim
 import pytest
@@ -33,12 +34,12 @@ def test_google_vision_api_face_extractor_inits():
     filename = join(
         get_test_data_path(), 'payloads', 'google_vision_api_face_payload.json')
     response = json.load(open(filename, 'r'))
-    features, data = ext._parse_annotations(response['faceAnnotations'])
-    assert len(features) == len(data)
-    assert data[features.index('face1_angerLikelihood')] == 'VERY_UNLIKELY'
-    assert data[
-        features.index('face1_landmark_LEFT_EYE_BOTTOM_BOUNDARY_y')] == 257.023
-    assert np.isnan(data[features.index('face1_boundingPoly_vertex2_y')])
+    stim = ImageStim(join(get_test_data_path(), 'image', 'obama.jpg'))
+    res = ExtractorResult(None, stim, ext, raw=response['faceAnnotations'])
+    df = res.to_df()
+    assert df['angerLikelihood'][0] == 'VERY_UNLIKELY'
+    assert df['landmark_LEFT_EYE_BOTTOM_BOUNDARY_y'][0] == 257.023
+    assert np.isnan(df['boundingPoly_vertex2_y'][0])
 
 
 @pytest.mark.skipif("'GOOGLE_APPLICATION_CREDENTIALS' not in os.environ")
@@ -47,9 +48,9 @@ def test_google_vision_api_face_extractor():
     filename = join(get_test_data_path(), 'image', 'obama.jpg')
     stim = ImageStim(filename)
     result = ext.transform(stim).to_df()
-    assert 'face1_joyLikelihood' in result.columns
-    assert result['face1_joyLikelihood'][0] == 'VERY_LIKELY'
-    assert float(result['face1_face_detectionConfidence'][0]) > 0.7
+    assert 'joyLikelihood' in result.columns
+    assert result['joyLikelihood'][0] == 'VERY_LIKELY'
+    assert float(result['face_detectionConfidence'][0]) > 0.7
 
 
 @pytest.mark.skipif("'GOOGLE_APPLICATION_CREDENTIALS' not in os.environ")
@@ -59,12 +60,12 @@ def test_google_vision_multiple_face_extraction():
     # Only first record
     ext = GoogleVisionAPIFaceExtractor(handle_annotations='first')
     result1 = ext.transform(stim).to_df()
-    assert 'face1_joyLikelihood' in result1.columns
+    assert 'joyLikelihood' in result1.columns
     # All records
     ext = GoogleVisionAPIFaceExtractor()
     result2 = ext.transform(stim).to_df()
-    assert 'face2_joyLikelihood' in result2.columns
-    assert result2.shape[1] > result1.shape[1]
+    assert 'joyLikelihood' in result2.columns
+    assert result2.shape[0] > result1.shape[0]
 
 
 @pytest.mark.skipif("'GOOGLE_APPLICATION_CREDENTIALS' not in os.environ")
@@ -77,23 +78,23 @@ def test_google_vision_face_batch():
     result = ext.transform(stims)
     result = merge_results(result, format='wide', extractor_names=False)
     assert result.shape == (2, 139)
-    assert 'face1_joyLikelihood' in result.columns
-    assert result['face1_joyLikelihood'][0] == 'VERY_LIKELY'
-    assert result['face1_joyLikelihood'][1] == 'VERY_LIKELY'
+    assert 'joyLikelihood' in result.columns
+    assert result['joyLikelihood'][0] == 'VERY_LIKELY'
+    assert result['joyLikelihood'][1] == 'VERY_LIKELY'
 
     video = VideoStim(join(get_test_data_path(), 'video', 'obama_speech.mp4'))
     conv = FrameSamplingFilter(every=10)
     video = conv.transform(video)
     result = ext.transform(video)
     result = merge_results(result, format='wide', extractor_names=False)
-    assert 'face1_joyLikelihood' in result.columns
+    assert 'joyLikelihood' in result.columns
     assert result.shape == (11, 139)
 
     video = VideoStim(join(get_test_data_path(), 'video', 'small.mp4'))
     video = conv.transform(video)
     result = ext.transform(video)
     result = merge_results(result, format='wide', extractor_names=False)
-    assert 'face1_joyLikelihood' not in result.columns
+    assert 'joyLikelihood' not in result.columns
     assert len(result) == 0