Skip to content

Commit

Permalink
Update docs
Browse files Browse the repository at this point in the history
  • Loading branch information
zuoxingdong committed Sep 4, 2018
1 parent 6b10ff4 commit 9b5f12c
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 48 deletions.
12 changes: 9 additions & 3 deletions docs/source/runner.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,21 @@ Transitions

.. autoclass:: Transition
:members:


History
----------------------------------

.. autoclass:: BaseHistory
:members:

Trajectory
----------
~~~~~~~~~~

.. autoclass:: Trajectory
:members:

Segments
--------
~~~~~~~~

.. autoclass:: Segment
:members:
Expand Down
17 changes: 13 additions & 4 deletions lagom/runner/base_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def all_returns(self):
r"""Return a list of accumulated returns (no discount, gamma=1.0) for all time steps.
Formally, suppose we have all rewards :math:`(r_1, \dots, r_T)`, it computes
:math:`G_t = \sum_{i=t}^{T} r_i` for all :math:`t`.
.. math::
`G_t = \sum_{i=t}^{T} r_i` for all :math:`t`.
.. note::
Expand All @@ -95,7 +96,8 @@ def all_discounted_returns(self):
r"""Return a list of discounted returns for all time steps.
Formally, suppose we have all rewards :math:`(r_1, \dots, r_T)`, it computes
:math:`G_t = \sum_{i=t}^{T} \gamma^{i - t} r_i` for all :math:`t`
.. math::
`G_t = \sum_{i=t}^{T} \gamma^{i - t} r_i` for all :math:`t`
.. note::
Expand All @@ -110,7 +112,8 @@ def all_bootstrapped_returns(self):
for all time steps.
Formally, suppose we have all rewards :math:`(r_1, \dots, r_T)`, it computes
:math:`Q_t = r_t + r_{t+1} + \dots + r_T + V(s_{T+1})`
.. math::
`Q_t = r_t + r_{t+1} + \dots + r_T + V(s_{T+1})`
.. note::
Expand All @@ -124,7 +127,8 @@ def all_bootstrapped_discounted_returns(self):
r"""Return a list of discounted returns with bootstrapping for all time steps.
Formally, suppose we have all rewards :math:`(r_1, \dots, r_T)`, it computes
:math:`Q_t = r_t + \gamma r_{t+1} + \dots + \gamma^{T - t} r_T + \gamma^{T - t + 1} V(s_{T+1})`
.. math::
`Q_t = r_t + \gamma r_{t+1} + \dots + \gamma^{T - t} r_T + \gamma^{T - t + 1} V(s_{T+1})`
.. note::
Expand All @@ -148,6 +152,11 @@ def all_V(self):
def all_TD(self):
r"""Return a list of all TD errors in the history including the terminal states.
Formally, suppose we have all rewards :math:`(r_1, \dots, r_T)` and all state
values :math:`(V(s_1), \dots, V(s_T), V(s_{T+1}))`, it computes
.. math::
`\delta_t = r_t + \gamma V(s_{t+1}) - V(s_t)`
.. note::
This behaves differently for :class:`Trajectory` and :class:`Segment`.
Expand Down
42 changes: 1 addition & 41 deletions lagom/runner/trajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,10 @@ def all_returns(self):

@property
def all_discounted_returns(self):
"""
Return a list of discounted returns for all time steps.
Suppose we have all rewards [r_1, ..., r_T], it computes
G_t = \sum_{i=t}^{T} \gamma^{i - t} r_i
"""
return ExpFactorCumSum(self.gamma)(self.all_r)

@property
def all_V(self):
"""
Return a list of all state values, from first to last state.
It takes information with the key 'V_s' for all transitions
and augment it with 'V_s_next' of the last transition.
Note that we would like to keep Tensor dtype, used for backprop.
"""
return [transition.V_s for transition in self.transitions] + [self.transitions[-1].V_s_next]

@property
Expand Down Expand Up @@ -94,31 +80,5 @@ def all_TD(self):
return all_TD.astype(np.float32).tolist()

@property
def all_gae(self, gae_lambda):
"""
Return a list of GAE.
https://arxiv.org/abs/1506.02438
TODO: remaining work.
"""
def all_GAE(self, gae_lambda):
raise NotImplementedError

def all_info(self, name):
"""
Return specified information for all transitions
Args:
name (str): name of the information
Returns:
list of specified information for all transitions
"""
info = [transition.info[name] for transition in self.transitions]

return info

def __repr__(self):
string = 'Trajectory: \n'
for transition in self.transitions:
string += '\t' + transition.__repr__() + '\n'
return string

0 comments on commit 9b5f12c

Please sign in to comment.