Skip to content

Commit

Permalink
fixed clarifai tests, everything importing
Browse files Browse the repository at this point in the history
  • Loading branch information
qmac committed Apr 13, 2018
1 parent 3ba2b64 commit 1e79272
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 39 deletions.
14 changes: 12 additions & 2 deletions pliers/extractors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,17 @@
from .base import Extractor, ExtractorResult, merge_results
from .api import (IndicoAPITextExtractor,
IndicoAPIImageExtractor,
ClarifaiAPIExtractor,
ClarifaiAPIImageExtractor,
ClarifaiAPIVideoExtractor,
GoogleVisionAPIFaceExtractor,
GoogleVisionAPILabelExtractor,
GoogleVisionAPIPropertyExtractor,
GoogleVisionAPISafeSearchExtractor,
GoogleVisionAPIWebEntitiesExtractor,
GoogleVideoIntelligenceAPIExtractor,
GoogleVideoAPILabelDetectionExtractor,
GoogleVideoAPIShotDetectionExtractor,
GoogleVideoAPIExplicitDetectionExtractor,
MicrosoftAPIFaceExtractor,
MicrosoftAPIFaceEmotionExtractor,
MicrosoftVisionAPIExtractor,
Expand Down Expand Up @@ -54,7 +59,8 @@
'ExtractorResult',
'IndicoAPITextExtractor',
'IndicoAPIImageExtractor',
'ClarifaiAPIExtractor',
'ClarifaiAPIImageExtractor',
'ClarifaiAPIVideoExtractor',
'STFTAudioExtractor',
'MeanAmplitudeExtractor',
'LibrosaFeatureExtractor',
Expand All @@ -77,6 +83,10 @@
'GoogleVisionAPIPropertyExtractor',
'GoogleVisionAPISafeSearchExtractor',
'GoogleVisionAPIWebEntitiesExtractor',
'GoogleVideoIntelligenceAPIExtractor',
'GoogleVideoAPILabelDetectionExtractor',
'GoogleVideoAPIShotDetectionExtractor',
'GoogleVideoAPIExplicitDetectionExtractor',
'BrightnessExtractor',
'SaliencyExtractor',
'SharpnessExtractor',
Expand Down
16 changes: 13 additions & 3 deletions pliers/extractors/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
from .clarifai import ClarifaiAPIExtractor
from .clarifai import (ClarifaiAPIImageExtractor,
ClarifaiAPIVideoExtractor)
from .indico import (IndicoAPITextExtractor,
IndicoAPIImageExtractor)
from .google import (GoogleVisionAPIFaceExtractor,
GoogleVisionAPILabelExtractor,
GoogleVisionAPIPropertyExtractor,
GoogleVisionAPISafeSearchExtractor,
GoogleVisionAPIWebEntitiesExtractor)
GoogleVisionAPIWebEntitiesExtractor,
GoogleVideoIntelligenceAPIExtractor,
GoogleVideoAPILabelDetectionExtractor,
GoogleVideoAPIShotDetectionExtractor,
GoogleVideoAPIExplicitDetectionExtractor)
from .microsoft import (MicrosoftAPIFaceExtractor,
MicrosoftAPIFaceEmotionExtractor,
MicrosoftVisionAPIExtractor,
Expand All @@ -16,14 +21,19 @@
MicrosoftVisionAPIAdultExtractor)

__all__ = [
'ClarifaiAPIExtractor',
'ClarifaiAPIImageExtractor',
'ClarifaiAPIVideoExtractor',
'IndicoAPITextExtractor',
'IndicoAPIImageExtractor',
'GoogleVisionAPIFaceExtractor',
'GoogleVisionAPILabelExtractor',
'GoogleVisionAPIPropertyExtractor',
'GoogleVisionAPISafeSearchExtractor',
'GoogleVisionAPIWebEntitiesExtractor',
'GoogleVideoIntelligenceAPIExtractor',
'GoogleVideoAPILabelDetectionExtractor',
'GoogleVideoAPIShotDetectionExtractor',
'GoogleVideoAPIExplicitDetectionExtractor',
'MicrosoftAPIFaceExtractor',
'MicrosoftAPIFaceEmotionExtractor',
'MicrosoftVisionAPIExtractor',
Expand Down
68 changes: 51 additions & 17 deletions pliers/extractors/api/clarifai.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

class ClarifaiAPIExtractor(APITransformer):

''' Uses the Clarifai API to extract tags of images.
''' Uses the Clarifai API to extract tags of visual stimuli.
Args:
api_key (str): A valid API_KEY for the Clarifai API. Only needs to be
Expand All @@ -41,7 +41,6 @@ class ClarifaiAPIExtractor(APITransformer):
API. For example, ['food', 'animal'].
rate_limit (int): The minimum number of seconds required between
transform calls on this Transformer.
batch_size (int): Number of stims to send per batched API request.
'''

_log_attributes = ('api_key', 'model', 'model_name', 'min_value',
Expand Down Expand Up @@ -76,8 +75,7 @@ def __init__(self, api_key=None, model='general-v1.3', min_value=None,
select_concepts = listify(select_concepts)
self.select_concepts = [clarifai_client.Concept(concept_name=n)
for n in select_concepts]
super(ClarifaiAPIExtractor, self).__init__(rate_limit=rate_limit,
batch_size=batch_size)
super(ClarifaiAPIExtractor, self).__init__(rate_limit=rate_limit)

@property
def api_keys(self):
Expand Down Expand Up @@ -105,8 +103,38 @@ def _parse_annotations(self, annotation):
class ClarifaiAPIImageExtractor(ClarifaiAPIExtractor, BatchTransformerMixin,
ImageExtractor):

''' Uses the Clarifai API to extract tags of images.
Args:
api_key (str): A valid API_KEY for the Clarifai API. Only needs to be
passed the first time the extractor is initialized.
model (str): The name of the Clarifai model to use. If None, defaults
to the general image tagger.
min_value (float): A value between 0.0 and 1.0 indicating the minimum
confidence required to return a prediction. Defaults to 0.0.
max_concepts (int): A value between 0 and 200 indicating the maximum
number of label predictions returned.
select_concepts (list): List of concepts (strings) to query from the
API. For example, ['food', 'animal'].
rate_limit (int): The minimum number of seconds required between
transform calls on this Transformer.
batch_size (int): Number of stims to send per batched API request.
'''

_batch_size = 32

def __init__(self, api_key=None, model='general-v1.3', min_value=None,
max_concepts=None, select_concepts=None, rate_limit=None,
batch_size=None):
super(ClarifaiAPIImageExtractor,
self).__init__(api_key=api_key,
model=model,
min_value=min_value,
max_concepts=max_concepts,
select_concepts=select_concepts,
rate_limit=rate_limit,
batch_size=batch_size)

def _extract(self, stims):
verify_dependencies(['clarifai_client'])

Expand Down Expand Up @@ -140,17 +168,23 @@ def _extract(self, stim):
return ExtractorResult(outputs, stim, self)

def _to_df(self, result):
rows = []
for frame_res in result._data[0]['data']['frames']:
data_dict = self._parse_annotations(frame_res)
data_dict['onset_'] = frame_res['frame_info']['time'] / 1000.0
frame_num = frame_res['frame_info']['index']
if frame_num == 0:
est_duration = 0.0
onsets = []
durations = []
data = []
frames = result._data[0]['data']['frames']
for i, frame_res in enumerate(frames):
data.append(self._parse_annotations(frame_res))
onset = frame_res['frame_info']['time'] / 1000.0
if (i + 1) == len(frames):
end = result.stim.duration
print(end)
else:
est_duration = data_dict['onset_'] / float(frame_num)
data_dict['duration_'] = min(est_duration,
result.stim.duration - data_dict['onset_'])
data_dict['order_'] = frame_num
rows.append(data_dict)
return pd.DataFrame(rows)
end = frames[i+1]['frame_info']['time'] / 1000.0
onsets.append(onset)
durations.append(end - onset)

result._onsets = onsets
result._durations = durations
df = pd.DataFrame(data)
result.features = list(df.columns)
return df
21 changes: 17 additions & 4 deletions pliers/extractors/api/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,10 @@ def _parse_label(self, features, label):
for segment in label['segments']:
onset, duration = self._get_onset_duration(segment['segment'])
score = segment['confidence']
data.extend(self._enumerate_features(features, onset, duration, score))
data.extend(self._enumerate_features(features,
onset,
duration,
score))
return data

def _parse_frame(self, features, annotation, score_key, max_duration):
Expand All @@ -257,7 +260,10 @@ def _parse_frame(self, features, annotation, score_key, max_duration):
end = float(frames[i+1]['timeOffset'][:-1])
duration = end - onset
score = frame[score_key]
data.extend(self._enumerate_features(features, onset, duration, score))
data.extend(self._enumerate_features(features,
onset,
duration,
score))
return data

def _to_df(self, result):
Expand All @@ -272,8 +278,12 @@ def _to_df(self, result):
for category in annot.get('categoryEntities', []):
feats.append('category_' + category['description'])
if key == 'frameLabelAnnotations':
data.extend(self._parse_frame(feats, annot, 'confidence', duration))
data.extend(self._parse_frame(feats,
annot,
'confidence',
duration))
else:
# Good for shot or segment labels
data.extend(self._parse_label(feats, annot))
elif key == 'shotAnnotations':
for shot in res:
Expand All @@ -285,7 +295,10 @@ def _to_df(self, result):
})
elif key == 'explicitAnnotation':
feature = 'pornographyLikelihood'
data.extend(self._parse_frame([feature], res, feature, duration))
data.extend(self._parse_frame([feature],
res,
feature,
duration))

df = pd.DataFrame(data)
result._onsets = df['onset']
Expand Down
41 changes: 28 additions & 13 deletions pliers/tests/extractors/api/test_clarifai_extractors.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,58 @@
from os.path import join
from ...utils import get_test_data_path
from pliers import config
from pliers.extractors import ClarifaiAPIExtractor
from pliers.extractors import (ClarifaiAPIImageExtractor,
ClarifaiAPIVideoExtractor)
from pliers.extractors.base import merge_results
from pliers.stimuli import ImageStim
from pliers.stimuli import ImageStim, VideoStim
import numpy as np
import pytest

IMAGE_DIR = join(get_test_data_path(), 'image')
VIDEO_DIR = join(get_test_data_path(), 'video')


@pytest.mark.skipif("'CLARIFAI_API_KEY' not in os.environ")
def test_clarifai_api_extractor():
stim = ImageStim(join(IMAGE_DIR, 'apple.jpg'))
ext = ClarifaiAPIExtractor()
ext = ClarifaiAPIImageExtractor()
assert ext.validate_keys()
result = ext.transform(stim).to_df()
assert result['apple'][0] > 0.5
assert result.ix[:, 5][0] > 0.0

result = ClarifaiAPIExtractor(max_concepts=5).transform(stim).to_df()
result = ClarifaiAPIImageExtractor(max_concepts=5).transform(stim).to_df()
assert result.shape == (1, 9)

result = ClarifaiAPIExtractor(
result = ClarifaiAPIImageExtractor(
min_value=0.9).transform(stim).to_df(object_id=False)
assert all(np.isnan(d) or d > 0.9 for d in result.values[0, 3:])

concepts = ['cat', 'dog']
result = ClarifaiAPIExtractor(select_concepts=concepts).transform(stim)
result = ClarifaiAPIImageExtractor(select_concepts=concepts).transform(stim)
result = result.to_df()
assert result.shape == (1, 6)
assert 'cat' in result.columns and 'dog' in result.columns

url = 'https://tuition.utexas.edu/sites/all/themes/tuition/logo.png'
stim = ImageStim(url=url)
result = ClarifaiAPIExtractor(max_concepts=5).transform(stim).to_df()
result = ClarifaiAPIImageExtractor(max_concepts=5).transform(stim).to_df()
assert result.shape == (1, 9)
assert result['symbol'][0] > 0.8

ext = ClarifaiAPIExtractor(api_key='nogood')
ext = ClarifaiAPIImageExtractor(api_key='nogood')
assert not ext.validate_keys()


@pytest.mark.skipif("'CLARIFAI_API_KEY' not in os.environ")
def test_clarifai_api_extractor_batch():
stim = ImageStim(join(IMAGE_DIR, 'apple.jpg'))
stim2 = ImageStim(join(IMAGE_DIR, 'obama.jpg'))
ext = ClarifaiAPIExtractor()
ext = ClarifaiAPIImageExtractor()
results = ext.transform([stim, stim2])
results = merge_results(results)
assert results['ClarifaiAPIExtractor#apple'][0] > 0.5 or \
results['ClarifaiAPIExtractor#apple'][1] > 0.5
assert results['ClarifaiAPIImageExtractor#apple'][0] > 0.5 or \
results['ClarifaiAPIImageExtractor#apple'][1] > 0.5


@pytest.mark.skipif("'CLARIFAI_API_KEY' not in os.environ")
Expand All @@ -60,16 +62,29 @@ def test_clarifai_api_extractor_large():
config.set_option('allow_large_jobs', False)
config.set_option('large_job', 1)

ext = ClarifaiAPIExtractor()
ext = ClarifaiAPIImageExtractor()
images = [ImageStim(join(IMAGE_DIR, 'apple.jpg')),
ImageStim(join(IMAGE_DIR, 'obama.jpg'))]
with pytest.raises(ValueError):
merge_results(ext.transform(images))

config.set_option('allow_large_jobs', True)
results = merge_results(ext.transform(images))
assert 'ClarifaiAPIExtractor#apple' in results.columns
assert 'ClarifaiAPIImageExtractor#apple' in results.columns
assert results.shape == (2, 49)

config.set_option('allow_large_jobs', default)
config.set_option('large_job', default_large)


@pytest.mark.skipif("'CLARIFAI_API_KEY' not in os.environ")
def test_clarifai_api_video_extractor():
stim = VideoStim(join(VIDEO_DIR, 'small.mp4'))
ext = ClarifaiAPIVideoExtractor()
assert ext.validate_keys()
result = ext.transform(stim).to_df()
assert result.shape == (6, 27)
assert result['toy'][0] > 0.5
assert result['onset'][1] == 1.0
assert result['duration'][0] == 1.0
assert np.isclose(result['duration'][5], 0.57)

0 comments on commit 1e79272

Please sign in to comment.