Update docs

zuoxingdong · Sep 4, 2018 · 6b10ff4 · 6b10ff4
1 parent 63a68c7
commit 6b10ff4
Show file tree

Hide file tree

Showing 3 changed files with 212 additions and 68 deletions.
diff --git a/lagom/runner/__init__.py b/lagom/runner/__init__.py
@@ -1,5 +1,8 @@
 from .transition import Transition
+
+from .base_history import BaseHistory
 from .trajectory import Trajectory
 from .segment import Segment
+
 from .trajectory_runner import TrajectoryRunner
 from .segment_runner import SegmentRunner
diff --git a/lagom/runner/base_history.py b/lagom/runner/base_history.py
@@ -0,0 +1,190 @@
+class BaseHistory(object):
+    r"""Base class for all history of :class:`Transition`. 
+    
+    It stores a list of successive transitions and a dictionary of additional useful information. 
+    
+    Common usecases can be :class:`Trajectory` or :class:`Segment`. 
+    
+    The subclass should implement at least the following:
+    
+    - :meth:`all_s`
+    - :meth:`all_returns`
+    - :meth:`all_discounted_returns`
+    - :meth:`all_bootstrapped_returns`
+    - :meth:`all_bootstrapped_discounted_returns`
+    - :meth:`all_V`
+    - :meth:`all_TD`
+    - :meth:`all_GAE`
+    
+    """
+    def __init__(self, gamma):
+        r"""Initialize the history of transitions. 
+        
+        Args:
+            gamme (float): discount factor. 
+        """
+        self.gamma = gamma
+
+        self.transitions = []
+        self.info = {}
+
+    def add_transition(self, transition):
+        r"""Append a new transition. 
+        
+        Args:
+            transition (Transition): a transition. 
+        """
+        self.transitions.append(transition)
+
+    def add_info(self, name, value):
+        r"""Add additional information. 
+        
+        Args:
+            name (str): name of the information
+            value (object): value of the information
+        """
+        self.info[name] = value
+
+    @property
+    def T(self):
+        r"""Return the total number of stored transitions. """
+        return len(self.transitions)
+
+    @property
+    def all_s(self):
+        r"""Return a list of all states in the history including the terminal state. 
+        
+        .. note::
+            
+            This behaves differently for :class:`Trajectory` and :class:`Segment`. 
+            
+        """
+        raise NotImplementedError
+
+    @property
+    def all_a(self):
+        r"""Return a list of all actions in the history."""
+        return [transition.a for transition in self.transitions]
+
+    @property
+    def all_r(self):
+        r"""Return a list of all rewards in the history."""
+        return [transition.r for transition in self.transitions]
+
+    @property
+    def all_done(self):
+        r"""Return a list of all dones in the history."""
+        return [transition.done for transition in self.transitions]
+
+    @property
+    def all_returns(self):
+        r"""Return a list of accumulated returns (no discount, gamma=1.0) for all time steps. 
+        
+        Formally, suppose we have all rewards :math:`(r_1, \dots, r_T)`, it computes
+        :math:`G_t = \sum_{i=t}^{T} r_i` for all :math:`t`. 
+        
+        .. note::
+        
+            This behaves differently for :class:`Trajectory` and :class:`Segment`. 
+            
+        """
+        raise NotImplementedError
+
+    @property
+    def all_discounted_returns(self):
+        r"""Return a list of discounted returns for all time steps. 
+        
+        Formally, suppose we have all rewards :math:`(r_1, \dots, r_T)`, it computes
+        :math:`G_t = \sum_{i=t}^{T} \gamma^{i - t} r_i` for all :math:`t`
+        
+        .. note::
+        
+            This behaves differently for :class:`Trajectory` and :class:`Segment`. 
+            
+        """
+        raise NotImplementedError
+
+    @property
+    def all_bootstrapped_returns(self):
+        r"""Return a list of accumulated returns (no discount, gamma=1.0) with bootstrapping
+        for all time steps. 
+        
+        Formally, suppose we have all rewards :math:`(r_1, \dots, r_T)`, it computes
+        :math:`Q_t = r_t + r_{t+1} + \dots + r_T + V(s_{T+1})`
+        
+        .. note::
+        
+            This behaves differently for :class:`Trajectory` and :class:`Segment`. 
+            
+        """
+        raise NotImplementedError
+
+    @property
+    def all_bootstrapped_discounted_returns(self):
+        r"""Return a list of discounted returns with bootstrapping for all time steps. 
+        
+        Formally, suppose we have all rewards :math:`(r_1, \dots, r_T)`, it computes
+        :math:`Q_t = r_t + \gamma r_{t+1} + \dots + \gamma^{T - t} r_T + \gamma^{T - t + 1} V(s_{T+1})`
+        
+        .. note::
+        
+            This behaves differently for :class:`Trajectory` and :class:`Segment`. 
+            
+        """
+        raise NotImplementedError
+
+    @property
+    def all_V(self):
+        r"""Return a list of all state values in the history including the terminal states.  
+        
+        .. note::
+        
+            This behaves differently for :class:`Trajectory` and :class:`Segment`. 
+            
+        """
+        raise NotImplementedError
+
+    @property
+    def all_TD(self):
+        r"""Return a list of all TD errors in the history including the terminal states. 
+        
+        .. note::
+        
+            This behaves differently for :class:`Trajectory` and :class:`Segment`. 
+            
+        """
+        raise NotImplementedError
+
+    def all_GAE(self, gae_lambda):
+        r"""Return a list of all `generalized advantage estimates`_ (GAE) in the history including
+        the terminal states.
+        
+        .. note::
+        
+            This behaves differently for :class:`Trajectory` and :class:`Segment`. 
+        
+        .. _generalized advantage estimates:
+            https://arxiv.org/abs/1506.02438
+        """
+        raise NotImplementedError
+
+    def all_info(self, name):
+        r"""Return the specified information from all transitions.
+        
+        Args:
+            name (str): name of the information
+            
+        Returns
+        -------
+        list
+            a list of the specified information from all transitions
+        """
+        info = [transition.info[name] for transition in self.transitions]
+
+        return info
+
+    def __repr__(self):
+        string = f'{self.__class__.__name__}: \n'
+        for transition in self.transitions:
+            string += '\t' + transition.__repr__() + '\n'
+        return string
diff --git a/lagom/runner/trajectory.py b/lagom/runner/trajectory.py
@@ -4,86 +4,37 @@
 
 from lagom.core.transform import ExpFactorCumSum
 
+from .base_history import BaseHistory
 
-class Trajectory(object):
-    """
-    Data for a trajectory, consisting of successive transitions, with additional useful information
-    e.g. other info includes length, success and so on.
+
+class Trajectory(BaseHistory):
+    r"""Define a trajectory, consisting of successive transitions. 
+    
+    .. note::
+    
+        It is not necessarily a complete episode (final state is terminal state). However, all transitions
+        must come from a single episode. For the history containing transitions from multiple episodes 
+        (i.e. ``done=True`` in the middle), it is recommended to use :class:`Segment` instead. 
+    
+    Example::
     
-    Note that it is not necessarily an episode (with terminal state). But it must be a part of an episode. 
-    For segment of transitions which can contains `done=True` in the middle (more than one episode data), one 
-    can use Segment instead. 
     """
-    def __init__(self, gamma):
-        self.gamma = gamma  # discount factor
-
-        self.transitions = []
-        self.info = {}
-
     def add_transition(self, transition):
-        """
-        Add a new transition to append in the trajectory
-        
-        Args:
-            transition (Transition): given transition
-        """
-        self.transitions.append(transition)
-
-    def add_info(self, name, value):
-        """
-        Add additional information for current trajectory
-        
-        Args:
-            name (str): name of the information
-            value (object): value of the information
-        """
-        self.info[name] = value
-
-    @property
-    def T(self):
-        """
-        Return the current length of the trajectory (number of transitions). 
-        """
-        return len(self.transitions)
+        # Sanity check for trajectory
+        # Not allowed to add more transition if it already contains done=True
+        if len(self.transitions) > 0:  # non-empty
+            assert self.transitions[-1].done == False, 'not allowed to add transition, because already contains done=True'
+        super().add_transition(transition)
 
     @property
     def all_s(self):
-        """
-        Return a list of all states in the trajectory from initial state to last state. 
+        r"""Return a list of all states in the trajectory, from first state to the last state (i.e. ``s_next`` in 
+        last transition). 
         """
         return [transition.s for transition in self.transitions] + [self.transitions[-1].s_next]
 
-    @property
-    def all_a(self):
-        """
-        Return a list of all actions in the trajectory. 
-        """
-        return [transition.a for transition in self.transitions]
-
-    @property
-    def all_r(self):
-        """
-        Return a list of all rewards in the trajectory. 
-        """
-        return [transition.r for transition in self.transitions]
-
-    @property
-    def all_done(self):
-        """
-        Return a list of all dones in the trajectory. 
-        
-        Note that the done for initial state is not included. 
-        """
-        return [transition.done for transition in self.transitions]
-
     @property
     def all_returns(self):
-        r"""
-        Return a list of returns (no discount, gamma=1.0) for all time steps. 
-        
-        Suppose we have all rewards [r_1, ..., r_T], it computes
-        G_t = \sum_{i=t}^{T} r_i
-        """
         return ExpFactorCumSum(1.0)(self.all_r)
 
     @property