Update docs

zuoxingdong · Sep 4, 2018 · 9b5f12c · 9b5f12c
1 parent 6b10ff4
commit 9b5f12c
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 48 deletions.
diff --git a/docs/source/runner.rst b/docs/source/runner.rst
@@ -8,15 +8,21 @@ Transitions
 
 .. autoclass:: Transition
     :members:
-
+
+History
+----------------------------------
+
+.. autoclass:: BaseHistory
+    :members:
+
 Trajectory
-----------
+~~~~~~~~~~
 
 .. autoclass:: Trajectory
     :members:
 
 Segments
---------
+~~~~~~~~
 
 .. autoclass:: Segment
     :members:

diff --git a/lagom/runner/base_history.py b/lagom/runner/base_history.py
@@ -81,7 +81,8 @@ def all_returns(self):
         r"""Return a list of accumulated returns (no discount, gamma=1.0) for all time steps. 
         
         Formally, suppose we have all rewards :math:`(r_1, \dots, r_T)`, it computes
-        :math:`G_t = \sum_{i=t}^{T} r_i` for all :math:`t`. 
+        .. math::
+            `G_t = \sum_{i=t}^{T} r_i` for all :math:`t`. 
         
         .. note::
         
@@ -95,7 +96,8 @@ def all_discounted_returns(self):
         r"""Return a list of discounted returns for all time steps. 
         
         Formally, suppose we have all rewards :math:`(r_1, \dots, r_T)`, it computes
-        :math:`G_t = \sum_{i=t}^{T} \gamma^{i - t} r_i` for all :math:`t`
+        .. math::
+            `G_t = \sum_{i=t}^{T} \gamma^{i - t} r_i` for all :math:`t`
         
         .. note::
         
@@ -110,7 +112,8 @@ def all_bootstrapped_returns(self):
         for all time steps. 
         
         Formally, suppose we have all rewards :math:`(r_1, \dots, r_T)`, it computes
-        :math:`Q_t = r_t + r_{t+1} + \dots + r_T + V(s_{T+1})`
+        .. math::
+            `Q_t = r_t + r_{t+1} + \dots + r_T + V(s_{T+1})`
         
         .. note::
         
@@ -124,7 +127,8 @@ def all_bootstrapped_discounted_returns(self):
         r"""Return a list of discounted returns with bootstrapping for all time steps. 
         
         Formally, suppose we have all rewards :math:`(r_1, \dots, r_T)`, it computes
-        :math:`Q_t = r_t + \gamma r_{t+1} + \dots + \gamma^{T - t} r_T + \gamma^{T - t + 1} V(s_{T+1})`
+        .. math::
+            `Q_t = r_t + \gamma r_{t+1} + \dots + \gamma^{T - t} r_T + \gamma^{T - t + 1} V(s_{T+1})`
         
         .. note::
         
@@ -148,6 +152,11 @@ def all_V(self):
     def all_TD(self):
         r"""Return a list of all TD errors in the history including the terminal states. 
         
+        Formally, suppose we have all rewards :math:`(r_1, \dots, r_T)` and all state
+        values :math:`(V(s_1), \dots, V(s_T), V(s_{T+1}))`, it computes
+        .. math::
+            `\delta_t = r_t + \gamma V(s_{t+1}) - V(s_t)`
+        
         .. note::
         
             This behaves differently for :class:`Trajectory` and :class:`Segment`. 

diff --git a/lagom/runner/trajectory.py b/lagom/runner/trajectory.py
@@ -39,24 +39,10 @@ def all_returns(self):
 
     @property
     def all_discounted_returns(self):
-        """
-        Return a list of discounted returns for all time steps. 
-        
-        Suppose we have all rewards [r_1, ..., r_T], it computes
-        G_t = \sum_{i=t}^{T} \gamma^{i - t} r_i
-        """
         return ExpFactorCumSum(self.gamma)(self.all_r)
 
     @property
     def all_V(self):
-        """
-        Return a list of all state values, from first to last state. 
-        
-        It takes information with the key 'V_s' for all transitions
-        and augment it with 'V_s_next' of the last transition. 
-        
-        Note that we would like to keep Tensor dtype, used for backprop.
-        """
         return [transition.V_s for transition in self.transitions] + [self.transitions[-1].V_s_next]
 
     @property
@@ -94,31 +80,5 @@ def all_TD(self):
         return all_TD.astype(np.float32).tolist()
 
     @property
-    def all_gae(self, gae_lambda):
-        """
-        Return a list of GAE. 
-        https://arxiv.org/abs/1506.02438
-        
-        TODO: remaining work. 
-        """
+    def all_GAE(self, gae_lambda):
         raise NotImplementedError
-
-    def all_info(self, name):
-        """
-        Return specified information for all transitions
-        
-        Args:
-            name (str): name of the information
-            
-        Returns:
-            list of specified information for all transitions
-        """
-        info = [transition.info[name] for transition in self.transitions]
-
-        return info
-
-    def __repr__(self):
-        string = 'Trajectory: \n'
-        for transition in self.transitions:
-            string += '\t' + transition.__repr__() + '\n'
-        return string