Skip to content

Commit

Permalink
Merge fef368d into 7b65ddf
Browse files Browse the repository at this point in the history
  • Loading branch information
tyarkoni committed Mar 26, 2019
2 parents 7b65ddf + fef368d commit 53f1212
Show file tree
Hide file tree
Showing 26 changed files with 252 additions and 255 deletions.
26 changes: 13 additions & 13 deletions .travis.yml
@@ -1,19 +1,18 @@
sudo: required
dist: trusty
env:
global:
- CONDA_DEPS="pip pytest numpy pillow pandas requests scipy nltk six tqdm seaborn opencv
matplotlib pathos tensorflow contextlib2"
language: python
matrix:
include:
- os: linux
env:
- PYTHON_VERSION=3.5
- MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
python: 3.5
- os: linux
env:
- PYTHON_VERSION=2.7
- MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh"
- PYTHON_VERSION=3.6
- MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
python: 3.6
before_install:
- openssl aes-256-cbc -K $encrypted_a0a62c26415d_key -iv $encrypted_a0a62c26415d_iv
-in pliers/tests/credentials/google.json.enc -out pliers/tests/credentials/google.json -d || true
Expand All @@ -30,24 +29,25 @@ before_install:
- conda config --add channels conda-forge
- conda update -y conda
- rm -rf /home/travis/miniconda/envs/test-env
- conda create -q -n test-env python=$PYTHON_VERSION $CONDA_DEPS
- conda create -q -n test-env python=$PYTHON_VERSION
- source activate test-env
install:
- sudo apt-get install libboost-python-dev
- pip install --upgrade --ignore-installed setuptools
- pip install python-magic moviepy coveralls pytest-cov pysrt xlrd pytesseract
- pip install clarifai==2.1.0
- pip install SpeechRecognition IndicoIo pygraphviz sklearn python-twitter gensim
google-compute-engine librosa face_recognition google-api-python-client
- pip install spacy
- pip install -r requirements.txt --upgrade
- pip install -r optional-dependencies.txt --upgrade
- pip install --upgrade coveralls pytest-cov

before_script:
- python -m pliers.support.download
- python -m spacy download en_core_web_sm
script:
- py.test --pyargs pliers --cov-report term-missing --cov=pliers
- py.test --pyargs pliers --cov-report term-missing --cov=pliers -m "not requires_payment"
after_success:
- coveralls
before_cache:
- conda clean --tarballs --packages --index-cache
cache:
pip: true
directories:
- "$HOME/miniconda"
2 changes: 1 addition & 1 deletion README.md
@@ -1,6 +1,6 @@
# pliers

A Python 2/3 package for automated feature extraction.
A Python 3 package for automated feature extraction.

## Status
* [![Build Status](https://travis-ci.org/tyarkoni/pliers.svg?branch=master)](https://travis-ci.org/tyarkoni/pliers)
Expand Down
2 changes: 1 addition & 1 deletion docs/stimuli.rst
Expand Up @@ -65,7 +65,7 @@ Consider this code:
tac = TranscribedAudioCompoundStim(audio, text)

# Construct a Graph with two extractors
graph = Graph(['PartOfSpeechExtractor', 'RMSEExtractor'])
graph = Graph(['PartOfSpeechExtractor', 'RMSExtractor'])

# Apply the extractors to the Stim
result = graph.transform(tac)
Expand Down
2 changes: 1 addition & 1 deletion docs/transformers.rst
Expand Up @@ -52,7 +52,7 @@ At present, pliers implements several dozen |Extractor| classes that span a wide
MelspectrogramExtractor
MFCCExtractor
PolyFeaturesExtractor
RMSEExtractor
RMSExtractor
SpectralCentroidExtractor
SpectralBandwidthExtractor
SpectralContrastExtractor
Expand Down
5 changes: 4 additions & 1 deletion optional-dependencies.txt
Expand Up @@ -4,8 +4,9 @@ face_recognition
python-twitter
gensim
google-api-python-client
google-compute-engine
IndicoIo
librosa
librosa>=0.6.3
matplotlib
opencv-python
pathos
Expand All @@ -15,5 +16,7 @@ pytesseract
python-twitter
scikit-learn
seaborn
spacy
SpeechRecognition>=3.6.0
tensorflow>=1.0.0
xlrd
3 changes: 2 additions & 1 deletion pliers/config.py
Expand Up @@ -12,7 +12,8 @@

_default_converters = {
'AudioStim->TextStim':
('IBMSpeechAPIConverter', 'WitTranscriptionConverter'),
('GoogleSpeechAPIConverter', 'IBMSpeechAPIConverter',
'WitTranscriptionConverter'),
'ImageStim->TextStim':
('GoogleVisionAPITextConverter', 'TesseractConverter')
}
Expand Down
15 changes: 9 additions & 6 deletions pliers/converters/api/ibm.py
Expand Up @@ -8,9 +8,9 @@
from pliers.utils import attempt_to_import, verify_dependencies
from pliers.converters.audio import AudioToTextConverter
from pliers.transformers.api import APITransformer
from six.moves.urllib.parse import urlencode
from six.moves.urllib.request import Request, urlopen
from six.moves.urllib.error import URLError, HTTPError
from urllib.parse import urlencode
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError

sr = attempt_to_import('speech_recognition', 'sr')

Expand All @@ -33,14 +33,16 @@ class IBMSpeechAPIConverter(APITransformer, AudioToTextConverter):
supported.
rate_limit (int): The minimum number of seconds required between
transform calls on this Transformer.
model (str): The model to use for speech recognition (e.g., 'en-US',
'zh-CN', etc.). Don't include the "_BroadbandModel" suffix.
'''

_env_keys = ('IBM_USERNAME', 'IBM_PASSWORD')
_log_attributes = ('username', 'password', 'resolution')
_log_attributes = ('username', 'password', 'resolution', 'model')
VERSION = '1.0'

def __init__(self, username=None, password=None, resolution='words',
rate_limit=None):
rate_limit=None, model='en-US'):
verify_dependencies(['sr'])
if username is None or password is None:
try:
Expand All @@ -53,6 +55,7 @@ def __init__(self, username=None, password=None, resolution='words',
self.username = username
self.password = password
self.resolution = resolution
self.model = model
super(IBMSpeechAPIConverter, self).__init__(rate_limit=rate_limit)

@property
Expand Down Expand Up @@ -119,7 +122,7 @@ def _query_api(self, clip):
convert_rate=None if clip.sample_rate >= 16000 else 16000,
convert_width=None if clip.sample_width >= 2 else 2
)
model = "{0}_BroadbandModel".format("en-US")
model = "{0}_BroadbandModel".format(self.model)
url = "https://stream.watsonplatform.net/speech-to-text/api/v1/recognize?{0}".format(urlencode({
"profanity_filter": "false",
"continuous": "true",
Expand Down
2 changes: 0 additions & 2 deletions pliers/extractors/__init__.py
Expand Up @@ -39,7 +39,6 @@
SpectralContrastExtractor,
SpectralRolloffExtractor,
PolyFeaturesExtractor,
RMSEExtractor,
ZeroCrossingRateExtractor,
ChromaSTFTExtractor,
ChromaCQTExtractor,
Expand Down Expand Up @@ -83,7 +82,6 @@
'SpectralContrastExtractor',
'SpectralRolloffExtractor',
'PolyFeaturesExtractor',
'RMSEExtractor',
'ZeroCrossingRateExtractor',
'ChromaSTFTExtractor',
'ChromaCQTExtractor',
Expand Down
10 changes: 9 additions & 1 deletion pliers/extractors/api/clarifai.py
Expand Up @@ -182,7 +182,15 @@ def _to_df(self, result):
else:
end = frames[i+1]['frame_info']['time'] / 1000.0
onsets.append(onset)
durations.append(end - onset)
# NOTE: As of Clarifai API v2 and client library 2.6.1, the API
# returns more frames than it should—at least for some videos.
# E.g., given a 5.5 second clip, it may return 7 frames, with the
# last beginning at 6000 ms. Since this appears to be a problem on
# the Clarifai end, and it's not actually clear how they're getting
# this imaginary frame (I'm guessing it's the very last frame?),
# we're not going to do anything about it here, except to make sure
# that durations aren't negative.
durations.append(max([end - onset, 0]))

result._onsets = onsets
result._durations = durations
Expand Down
16 changes: 11 additions & 5 deletions pliers/extractors/api/google.py
@@ -1,6 +1,5 @@
''' Google API-based feature extraction classes. '''

import base64
from pliers.extractors.image import ImageExtractor
from pliers.extractors.text import TextExtractor
from pliers.extractors.video import VideoExtractor
Expand All @@ -13,6 +12,8 @@
import pandas as pd
import logging
import time
import warnings
import os
from collections import defaultdict


Expand Down Expand Up @@ -208,16 +209,21 @@ def _query_operations(self, name):
return request_obj.execute(num_retries=self.num_retries)

def _build_request(self, stim):
with stim.get_filename() as filename:
with open(filename, 'rb') as f:
vid_data = f.read()

context = self.config if self.config else {}
if self.segments:
context['segments'] = self.segments

with stim.get_filename() as filename:
size = os.path.getsize(filename)
LIMIT = 524288000
if size > LIMIT:
warnings.warn("Video file is very large ({} bytes) and may "
"exceed the Google Video Intelligence payload "
"limit ({} bytes).".format(size, LIMIT))

request = {
'inputContent': base64.b64encode(vid_data).decode(),
'inputContent': stim.get_bytestring(),
'features': self.features,
'videoContext': context
}
Expand Down
6 changes: 5 additions & 1 deletion pliers/extractors/api/indico.py
Expand Up @@ -11,6 +11,7 @@
from pliers.transformers.api import APITransformer
from pliers.utils import attempt_to_import, verify_dependencies
import pandas as pd
import numpy as np

indicoio = attempt_to_import('indicoio')

Expand Down Expand Up @@ -84,6 +85,7 @@ def _get_tokens(self, stims):
return [stim.data for stim in stims if stim.data is not None]

def _extract(self, stims):
stims = list(stims)
tokens = self._get_tokens(stims)
scores = [model(tokens) for model in self.models]

Expand Down Expand Up @@ -157,5 +159,7 @@ def _get_tokens(self, stims):
if s.url:
toks.append(s.url)
elif s.data is not None:
toks.append(s.data)
# IndicoIO breaks if given subclasses of ndarray, and data is
# an imageio Image instance, so we explicitly convert.
toks.append(np.array(s.data))
return toks

0 comments on commit 53f1212

Please sign in to comment.