Merge 9d3f262 into 266518e

PsychoinformaticsLab · May 2, 2020 · 078658f · 078658f
2 parents 266518e + 9d3f262
commit 078658f
Show file tree

Hide file tree

Showing 9 changed files with 147 additions and 12 deletions.
diff --git a/pliers/converters/__init__.py b/pliers/converters/__init__.py
@@ -1,5 +1,6 @@
-''' The `Converter` hierarchy contains Transformer classes that take a `Stim`
-of one type as input and return a `Stim` of a different type as output.
+''' The `Converter` hierarchy contains Transformer classes that take an object
+of arbitrary class (but almost always a `Stim` subclass) as input, and return a
+`Stim` instance (of different class) as output.
 '''
 
 from .api import (WitTranscriptionConverter,
@@ -14,6 +15,8 @@
                         ComplexTextIterator)
 from .multistep import VideoToTextConverter, VideoToComplexTextConverter
 from .video import VideoToAudioConverter
+from .misc import ExtractorResultToSeriesConverter
+
 
 __all__ = [
     'WitTranscriptionConverter',
@@ -29,6 +32,7 @@
     'VideoToComplexTextConverter',
     'VideoToAudioConverter',
     'RevAISpeechAPIConverter',
+    'ExtractorResultToSeriesConverter',
     'Converter',
     'get_converter'
 ]
diff --git a/pliers/converters/base.py b/pliers/converters/base.py
@@ -37,11 +37,10 @@ def get_converter(in_type, out_type, *args, **kwargs):
     '''
     convs = pliers.converters.__all__
 
-    # If config includes default converters for this combination, try them
-    # first
-    out_type = listify(out_type)[::-1]
+    # If config includes default converters for this combination, try them 1st
     default_convs = config.get_option('default_converters')
 
+    out_type = listify(out_type)[::-1]
     for ot in out_type:
         conv_str = '{}->{}'.format(in_type.__name__, ot.__name__)
         if conv_str in default_convs:
@@ -52,8 +51,10 @@ def get_converter(in_type, out_type, *args, **kwargs):
         if not inspect.isclass(cls) or not issubclass(cls, Converter):
             continue
 
+        # Some classes are only available if certain environment keys are set
         available = cls.available if issubclass(
             cls, EnvironmentKeyMixin) else True
+
         if cls._input_type == in_type and cls._output_type in out_type \
                 and available:
             conv = cls(*args, **kwargs)

diff --git a/pliers/converters/misc.py b/pliers/converters/misc.py
@@ -0,0 +1,23 @@
+"""Miscellaneous conversion classes."""
+
+from pliers.extractors import ExtractorResult
+from pliers.stimuli import SeriesStim
+from .base import Converter
+
+
+class ExtractorResultToSeriesConverter(Converter):
+    """Converts an ExtractorResult instance to a list of SeriesStims."""
+
+    _input_type = ExtractorResult
+    _output_type = SeriesStim
+
+    def _convert(self, result):
+        df = result.to_df(timing=False, metadata=False, object_id=False)
+        stims = []
+        for i, data in df.iterrows():
+            onset = result.onset[i] if result.onset is not None else None
+            dur = result.duration[i] if result.duration is not None else None
+            order = result.order[i] if result.order is not None else i
+            st = SeriesStim(data, onset=onset, duration=dur, order=order)
+            stims.append(st)
+        return stims
diff --git a/pliers/extractors/base.py b/pliers/extractors/base.py
@@ -49,9 +49,6 @@ class ExtractorResult:
             associated with the rows in data.
         orders (list, ndarray): Optional iterable giving the integer orders
             associated with the rows in data.
-        raw: The raw result (net of any containers or overhead) returned by
-            the underlying feature extraction tool. Can be an object of any
-            type.
     '''
 
     def __init__(self, data, stim, extractor, features=None, onsets=None,

diff --git a/pliers/stimuli/__init__.py b/pliers/stimuli/__init__.py
@@ -8,6 +8,7 @@
 from .image import ImageStim
 from .text import TextStim, ComplexTextStim
 from .video import VideoStim, VideoFrameCollectionStim, VideoFrameStim
+from .misc import SeriesStim
 
 
 __all__ = [
@@ -22,5 +23,6 @@
     'VideoFrameStim',
     'TweetStimFactory',
     'TweetStim',
+    'SeriesStim',
     'load_stims'
 ]
diff --git a/pliers/stimuli/base.py b/pliers/stimuli/base.py
@@ -187,7 +187,12 @@ def _log_transformation(source, result, trans=None, implicit=False):
     if isiterable(result):
         return (_log_transformation(source, r, trans) for r in result)
 
-    values = [source.name, source.filename, source.__class__.__name__]
+    # Converters are no longer restricted to Stim inputs, so ensure name and
+    # filename are set.
+    name = getattr(source, 'name', None)
+    filename = getattr(source, 'filename', None)
+
+    values = [name, filename, source.__class__.__name__]
     if isinstance(result, Stim):
         values.extend([result.name, result.filename])
     else:

diff --git a/pliers/stimuli/misc.py b/pliers/stimuli/misc.py
@@ -0,0 +1,61 @@
+"""Miscellaneous Stim classes."""
+
+import numpy as np
+import pandas as pd
+
+from .base import Stim
+
+
+class SeriesStim(Stim):
+    '''Represents a pandas Series as a pliers Stim.
+
+    Args:
+        data (dict, pd.Series, array-like): A dictionary, pandas Series, or any
+            other iterable (e.g., list or 1-D numpy array) that can be coerced
+            to a pandas Series.
+        filename (str, optional): Path or URL to data file. Must be readable
+            using pd.read_csv().
+        onset (float): Optional onset of the SeriesStim (in seconds) with
+            respect to some more general context or timeline the user wishes
+            to keep track of.
+        duration (float): Optional duration of the SeriesStim, in seconds.
+        order (int): Optional order of stim within some broader context.
+        url (str): Optional URL to read data from. Must be readable using
+            pd.read_csv().
+        column (str): If filename or url is passed, defines the name of the
+            column in the data source to read in as data.
+        name (str): Optional name to give the SeriesStim instance. If None
+            is provided, the name will be derived from the filename if one is
+            defined. If no filename is defined, name will be an empty string.
+        pd_args: Optional keyword arguments passed onto pd.read_csv() (e.g., 
+            to control separator, header, etc.).
+    '''
+
+    def __init__(self, data=None, filename=None, onset=None, duration=None,
+                 order=None, url=None, column=None, name=None, **pd_args):
+
+        if data is None:
+            if filename is None and url is None:
+                raise ValueError("No data provided! One of the data, filename,"
+                                 "or url arguments must be passed.")
+            source = filename or url
+            data = pd.read_csv(source, squeeze=True, **pd_args)
+            if isinstance(data, pd.DataFrame):
+                if column is None:
+                    raise ValueError("Data source contains more than one "
+                                    "column; please specify which column to "
+                                    "use by passing the 'column' argument.")
+                data = data.loc[:, column]
+
+        data = pd.Series(data)
+        self.data = data
+        super().__init__(filename, onset, duration, order, name)
+
+    def save(self, path, **kwargs):
+        """Save stored series to disk.
+
+        Args:
+            path (str): The path of the file to save to.
+            kwargs: Optional keyword arguments passed to pandas' to_csv()
+        """
+        self.data.to_csv(path, **kwargs)
diff --git a/pliers/tests/converters/test_converters.py b/pliers/tests/converters/test_converters.py
@@ -7,10 +7,12 @@
                                VideoToAudioConverter,
                                VideoToTextConverter,
                                WitTranscriptionConverter,
-                               ComplexTextIterator)
+                               ComplexTextIterator,
+                               ExtractorResultToSeriesConverter)
 from pliers.converters.image import ImageToTextConverter
-from pliers.stimuli import (VideoStim, TextStim,
+from pliers.stimuli import (VideoStim, TextStim, SeriesStim,
                             ComplexTextStim, ImageStim)
+from pliers.extractors import ExtractorResult
 
 
 def test_get_converter():
@@ -53,3 +55,18 @@ def test_stim_iteration_converter():
     assert words[1].text == 'Sherlock'
     assert str(
         words[1].history) == 'ComplexTextStim->ComplexTextIterator/TextStim'
+
+
+def test_extractor_result_to_series_converter():
+    data = [[2, 4], [1, 7], [6, 6], [8, 2]]
+    result = ExtractorResult(data, None, None, features=['a', 'b'],
+                             onsets=[2, 4, 6, 8])
+    stims = ExtractorResultToSeriesConverter().transform(result)
+    assert len(stims) == 4
+    stim = stims[2]
+    assert isinstance(stim, SeriesStim)
+    assert stim.data.shape == (2,)
+    assert list(stim.data) == [6, 6]
+    assert stim.onset == 6
+    assert stim.duration is None
+    assert stim.order == 2
diff --git a/pliers/tests/test_stims.py b/pliers/tests/test_stims.py
@@ -2,6 +2,7 @@
 import os
 import base64
 from os.path import join, exists
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
@@ -13,7 +14,8 @@
                             TranscribedAudioCompoundStim,
                             TextStim,
                             TweetStimFactory,
-                            TweetStim)
+                            TweetStim,
+                            SeriesStim)
 from pliers.stimuli.base import Stim, _get_stim_class
 from pliers.extractors import (BrightnessExtractor, LengthExtractor,
                                ComplexTextExtractor)
@@ -335,3 +337,26 @@ def test_twitter():
     res = ext.transform(ut_tweet)[0].to_df()
     brightness = res['brightness'][0]
     assert np.isclose(brightness, 0.54057, 1e-5)
+
+
+def test_series():
+    my_dict = {'a': 4, 'b': 2, 'c': 8}
+    stim = SeriesStim(my_dict, onset=4, duration=2)
+    ser = pd.Series([4, 2, 8], index=['a', 'b', 'c'])
+    pd.testing.assert_series_equal(stim.data, ser)
+    assert stim.onset == 4
+    assert stim.duration == 2
+    assert stim.order is None
+
+    f = Path(get_test_data_path(), 'text', 'test_lexical_dictionary.txt')
+    # multiple columns found and no column arg provided
+    with pytest.raises(ValueError):
+        stim = SeriesStim(filename=f, sep='\t')
+
+    stim = SeriesStim(filename=f, column='frequency', sep='\t')
+    assert stim.data.shape == (7,)
+    assert stim.data[3] == 15.417
+
+    # 2-d array should fail
+    with pytest.raises(Exception):
+        ser = SeriesStim(np.random.normal(size=(10, 2)))