Skip to content

Commit

Permalink
Merge 9d3f262 into 266518e
Browse files Browse the repository at this point in the history
  • Loading branch information
tyarkoni committed May 2, 2020
2 parents 266518e + 9d3f262 commit 078658f
Show file tree
Hide file tree
Showing 9 changed files with 147 additions and 12 deletions.
8 changes: 6 additions & 2 deletions pliers/converters/__init__.py
@@ -1,5 +1,6 @@
''' The `Converter` hierarchy contains Transformer classes that take a `Stim`
of one type as input and return a `Stim` of a different type as output.
''' The `Converter` hierarchy contains Transformer classes that take an object
of arbitrary class (but almost always a `Stim` subclass) as input, and return a
`Stim` instance (of different class) as output.
'''

from .api import (WitTranscriptionConverter,
Expand All @@ -14,6 +15,8 @@
ComplexTextIterator)
from .multistep import VideoToTextConverter, VideoToComplexTextConverter
from .video import VideoToAudioConverter
from .misc import ExtractorResultToSeriesConverter


__all__ = [
'WitTranscriptionConverter',
Expand All @@ -29,6 +32,7 @@
'VideoToComplexTextConverter',
'VideoToAudioConverter',
'RevAISpeechAPIConverter',
'ExtractorResultToSeriesConverter',
'Converter',
'get_converter'
]
7 changes: 4 additions & 3 deletions pliers/converters/base.py
Expand Up @@ -37,11 +37,10 @@ def get_converter(in_type, out_type, *args, **kwargs):
'''
convs = pliers.converters.__all__

# If config includes default converters for this combination, try them
# first
out_type = listify(out_type)[::-1]
# If config includes default converters for this combination, try them 1st
default_convs = config.get_option('default_converters')

out_type = listify(out_type)[::-1]
for ot in out_type:
conv_str = '{}->{}'.format(in_type.__name__, ot.__name__)
if conv_str in default_convs:
Expand All @@ -52,8 +51,10 @@ def get_converter(in_type, out_type, *args, **kwargs):
if not inspect.isclass(cls) or not issubclass(cls, Converter):
continue

# Some classes are only available if certain environment keys are set
available = cls.available if issubclass(
cls, EnvironmentKeyMixin) else True

if cls._input_type == in_type and cls._output_type in out_type \
and available:
conv = cls(*args, **kwargs)
Expand Down
23 changes: 23 additions & 0 deletions pliers/converters/misc.py
@@ -0,0 +1,23 @@
"""Miscellaneous conversion classes."""

from pliers.extractors import ExtractorResult
from pliers.stimuli import SeriesStim
from .base import Converter


class ExtractorResultToSeriesConverter(Converter):
"""Converts an ExtractorResult instance to a list of SeriesStims."""

_input_type = ExtractorResult
_output_type = SeriesStim

def _convert(self, result):
df = result.to_df(timing=False, metadata=False, object_id=False)
stims = []
for i, data in df.iterrows():
onset = result.onset[i] if result.onset is not None else None
dur = result.duration[i] if result.duration is not None else None
order = result.order[i] if result.order is not None else i
st = SeriesStim(data, onset=onset, duration=dur, order=order)
stims.append(st)
return stims
3 changes: 0 additions & 3 deletions pliers/extractors/base.py
Expand Up @@ -49,9 +49,6 @@ class ExtractorResult:
associated with the rows in data.
orders (list, ndarray): Optional iterable giving the integer orders
associated with the rows in data.
raw: The raw result (net of any containers or overhead) returned by
the underlying feature extraction tool. Can be an object of any
type.
'''

def __init__(self, data, stim, extractor, features=None, onsets=None,
Expand Down
2 changes: 2 additions & 0 deletions pliers/stimuli/__init__.py
Expand Up @@ -8,6 +8,7 @@
from .image import ImageStim
from .text import TextStim, ComplexTextStim
from .video import VideoStim, VideoFrameCollectionStim, VideoFrameStim
from .misc import SeriesStim


__all__ = [
Expand All @@ -22,5 +23,6 @@
'VideoFrameStim',
'TweetStimFactory',
'TweetStim',
'SeriesStim',
'load_stims'
]
7 changes: 6 additions & 1 deletion pliers/stimuli/base.py
Expand Up @@ -187,7 +187,12 @@ def _log_transformation(source, result, trans=None, implicit=False):
if isiterable(result):
return (_log_transformation(source, r, trans) for r in result)

values = [source.name, source.filename, source.__class__.__name__]
# Converters are no longer restricted to Stim inputs, so ensure name and
# filename are set.
name = getattr(source, 'name', None)
filename = getattr(source, 'filename', None)

values = [name, filename, source.__class__.__name__]
if isinstance(result, Stim):
values.extend([result.name, result.filename])
else:
Expand Down
61 changes: 61 additions & 0 deletions pliers/stimuli/misc.py
@@ -0,0 +1,61 @@
"""Miscellaneous Stim classes."""

import numpy as np
import pandas as pd

from .base import Stim


class SeriesStim(Stim):
'''Represents a pandas Series as a pliers Stim.
Args:
data (dict, pd.Series, array-like): A dictionary, pandas Series, or any
other iterable (e.g., list or 1-D numpy array) that can be coerced
to a pandas Series.
filename (str, optional): Path or URL to data file. Must be readable
using pd.read_csv().
onset (float): Optional onset of the SeriesStim (in seconds) with
respect to some more general context or timeline the user wishes
to keep track of.
duration (float): Optional duration of the SeriesStim, in seconds.
order (int): Optional order of stim within some broader context.
url (str): Optional URL to read data from. Must be readable using
pd.read_csv().
column (str): If filename or url is passed, defines the name of the
column in the data source to read in as data.
name (str): Optional name to give the SeriesStim instance. If None
is provided, the name will be derived from the filename if one is
defined. If no filename is defined, name will be an empty string.
pd_args: Optional keyword arguments passed onto pd.read_csv() (e.g.,
to control separator, header, etc.).
'''

def __init__(self, data=None, filename=None, onset=None, duration=None,
order=None, url=None, column=None, name=None, **pd_args):

if data is None:
if filename is None and url is None:
raise ValueError("No data provided! One of the data, filename,"
"or url arguments must be passed.")
source = filename or url
data = pd.read_csv(source, squeeze=True, **pd_args)
if isinstance(data, pd.DataFrame):
if column is None:
raise ValueError("Data source contains more than one "
"column; please specify which column to "
"use by passing the 'column' argument.")
data = data.loc[:, column]

data = pd.Series(data)
self.data = data
super().__init__(filename, onset, duration, order, name)

def save(self, path, **kwargs):
"""Save stored series to disk.
Args:
path (str): The path of the file to save to.
kwargs: Optional keyword arguments passed to pandas' to_csv()
"""
self.data.to_csv(path, **kwargs)
21 changes: 19 additions & 2 deletions pliers/tests/converters/test_converters.py
Expand Up @@ -7,10 +7,12 @@
VideoToAudioConverter,
VideoToTextConverter,
WitTranscriptionConverter,
ComplexTextIterator)
ComplexTextIterator,
ExtractorResultToSeriesConverter)
from pliers.converters.image import ImageToTextConverter
from pliers.stimuli import (VideoStim, TextStim,
from pliers.stimuli import (VideoStim, TextStim, SeriesStim,
ComplexTextStim, ImageStim)
from pliers.extractors import ExtractorResult


def test_get_converter():
Expand Down Expand Up @@ -53,3 +55,18 @@ def test_stim_iteration_converter():
assert words[1].text == 'Sherlock'
assert str(
words[1].history) == 'ComplexTextStim->ComplexTextIterator/TextStim'


def test_extractor_result_to_series_converter():
data = [[2, 4], [1, 7], [6, 6], [8, 2]]
result = ExtractorResult(data, None, None, features=['a', 'b'],
onsets=[2, 4, 6, 8])
stims = ExtractorResultToSeriesConverter().transform(result)
assert len(stims) == 4
stim = stims[2]
assert isinstance(stim, SeriesStim)
assert stim.data.shape == (2,)
assert list(stim.data) == [6, 6]
assert stim.onset == 6
assert stim.duration is None
assert stim.order == 2
27 changes: 26 additions & 1 deletion pliers/tests/test_stims.py
Expand Up @@ -2,6 +2,7 @@
import os
import base64
from os.path import join, exists
from pathlib import Path

import numpy as np
import pandas as pd
Expand All @@ -13,7 +14,8 @@
TranscribedAudioCompoundStim,
TextStim,
TweetStimFactory,
TweetStim)
TweetStim,
SeriesStim)
from pliers.stimuli.base import Stim, _get_stim_class
from pliers.extractors import (BrightnessExtractor, LengthExtractor,
ComplexTextExtractor)
Expand Down Expand Up @@ -335,3 +337,26 @@ def test_twitter():
res = ext.transform(ut_tweet)[0].to_df()
brightness = res['brightness'][0]
assert np.isclose(brightness, 0.54057, 1e-5)


def test_series():
my_dict = {'a': 4, 'b': 2, 'c': 8}
stim = SeriesStim(my_dict, onset=4, duration=2)
ser = pd.Series([4, 2, 8], index=['a', 'b', 'c'])
pd.testing.assert_series_equal(stim.data, ser)
assert stim.onset == 4
assert stim.duration == 2
assert stim.order is None

f = Path(get_test_data_path(), 'text', 'test_lexical_dictionary.txt')
# multiple columns found and no column arg provided
with pytest.raises(ValueError):
stim = SeriesStim(filename=f, sep='\t')

stim = SeriesStim(filename=f, column='frequency', sep='\t')
assert stim.data.shape == (7,)
assert stim.data[3] == 15.417

# 2-d array should fail
with pytest.raises(Exception):
ser = SeriesStim(np.random.normal(size=(10, 2)))

0 comments on commit 078658f

Please sign in to comment.