In [10]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.distributions.normal import Normal

In [2]:
class CriticNetwork(nn.Module):
    def __init__(
        self,
        n_observations, n_actions,
        fc1_dim=256, fc2_dim=256
    ):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(n_observations[0] + n_actions, fc1_dim),
            nn.ReLU(),
            nn.Linear(fc1_dim, fc2_dim),
            nn.ReLU(),
            nn.Linear(fc2_dim, 1),
        )
    
    def forward(self, state, action):
        input = torch.cat([state, action], dim=1)
        return self.layers(input)

In [3]:
class ValueNetwork(nn.Module):
    def __init__(self, n_observations, fc1_dim, fc2_dim):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(n_observations, fc1_dim),
            nn.ReLU(),
            nn.Linear(fc1_dim, fc2_dim),
            nn.ReLU(),
            nn.Linear(fc2_dim, 1)
        )
    
    def forward(self, state):
        return self.layers(state)

In [11]:
class ActorNetwork(nn.Module):
    def __init__(self, n_observations, n_actions, max_action, fc1_dim=256, fc2_dim=256):
        super().__init__()
        self.reparam_noise = 1e-6
        self.max_action = max_action
        
        self.layers = nn.Sequential(
            nn.Linear(n_observations, fc1_dim),
            nn.ReLU(),
            nn.Linear(fc1_dim, fc2_dim),
            nn.ReLU(),
        )
        
        self.mu = nn.Linear(fc2_dim, n_actions)
        self.sigma = nn.Linear(fc2_dim, n_actions)
    
    def forward(self, state):
        prob = self.layers(state)
        
        mu = self.mu(state)
        sigma = self.sigma(state)
        sigma = torch.clamp(sigma, min=self.reparam_noise, max=1)

        return mu, sigma

    def sample_norm(self, state, reparametrize=True):
        mu, sigma = self.forward(state)
        probs = Normal()
        
        if reparametrize:
            actions = probs.rsample()
        else:
            actions = probs.sample()
        
        action = torch.tanh(actions) * self.max_action
        log_probs = probs.log_prob(actions)

In [6]:
CRITIC_LEARNING_RATE = 1e-3
VALUE_LEARNING_RATE = 1e-3
ACTOR_LEARNING_RATE = 1e-3

In [7]:
critic_network = CriticNetwork()
value_network = ValueNetwork()
actor_network = ActorNetwork()

TypeError: __init__() missing 2 required positional arguments: 'n_observations' and 'n_actions'

In [18]:
critic_optimizier = optim.Adam(critic_network.parameters(), lr=CRITIC_LEARNING_RATE)
value_optimizier = optim.Adam(value_network.parameters(), lr=VALUE_LEARNING_RATE)
actor_optimizier = optim.Adam(actor_network.parameters(), lr=ACTOR_LEARNING_RATE)

NameError: name 'critic_network' is not defined

In [21]:
#https://youtu.be/ioidsRlf79o?t=1230