Skip to content

Commit

Permalink
The line that fixes all
Browse files Browse the repository at this point in the history
  • Loading branch information
xeviknal authored and ziritrion committed Mar 14, 2021
1 parent 39b3ad3 commit 907af7a
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def forward(self, x):
x= self.pipeline(x)
# actor: choses action to take from state s_t
# by returning probability of each action
action_prob = F.log_softmax(self.actor_head(x), dim=-1)
action_prob = F.softmax(self.actor_head(x), dim=-1)

# critic: evaluates being in the state s_t
state_values = self.critic_head(x)
Expand Down
2 changes: 1 addition & 1 deletion trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def episode_train(self, iteration):
policy_loss.append(-log_prob * advantage)

# calculate critic (value) loss using L1 smooth loss
value_losses.append(F.smooth_l1_loss(baseline, torch.tensor([G]).to(self.device)))
value_losses.append(F.smooth_l1_loss(baseline.squeeze(), G))

# Update policy:
self.optimizer.zero_grad()
Expand Down

0 comments on commit 907af7a

Please sign in to comment.