Skip to content

Commit

Permalink
Update docs
Browse files Browse the repository at this point in the history
  • Loading branch information
zuoxingdong committed Sep 4, 2018
1 parent b556ea8 commit 63a68c7
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
14 changes: 11 additions & 3 deletions lagom/runner/transition.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ class Transition(object):
Formally, it stores the transition tuple :math:`(s, a, r, s_{\text{next}}, \text{done})`.
It also stores additional useful information, e.g. log-probability of action, state value etc.
Example::
>>> transition = Transition(s=0.2, a=1.3, r=-1.0, s_next=0.8, done=True)
>>> transition
Transition: (s=0.2, a=1.3, r=-1.0, s_next=0.8, done=True)
"""
def __init__(self, s, a, r, s_next, done):
r"""Initialize the transition
Expand Down Expand Up @@ -55,10 +62,11 @@ def V_s_next(self):
.. note::
Often it returns as Tensor dtype, it can be useful for backprop to train
value function. However, be cautious of handling the raw value e.g. zero
value should be replaced when the next state is terminal state.
value function. However, be cautious of handling the raw value e.g. calculate
bootstrapped returns, then zero value should be replaced when the next state
is terminal state.
"""
return self.info['V_s_next']

def __repr__(self):
return f'Transition: ({self.s}, {self.a}, {self.r}, {self.s_next}, {self.done})'
return f'Transition: (s={self.s}, a={self.a}, r={self.r}, s_next={self.s_next}, done={self.done})'
2 changes: 1 addition & 1 deletion test/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def test_transition(self):
assert transition.a == 2.0
assert transition.r == -1.0
assert transition.s_next == 1.5
assert transition.done
assert transition.done == True

assert len(transition.info) == 0

Expand Down

0 comments on commit 63a68c7

Please sign in to comment.