In [1]:
import gym
import tensorflow as tf
import numpy as np
import collections

print(tf.__version__)
print(gym.__version__)

from tensorflow.keras import layers
from typing import Tuple, List
from datetime import datetime
env = gym.make('CartPole-v0')

seed = 42
env.seed(seed)
tf.random.set_seed(seed)
np.random.seed(seed)



2.8.0
0.19.0


In [6]:
class ActorCritic(tf.keras.Model):

    def __init__(self, num_actions: int, num_hidden_units: int):
        super().__init__()

        self.common = layers.Dense(num_hidden_units, activation="relu")
        self.actor = layers.Dense(num_actions)
        self.critic = layers.Dense(1)

    def call(self, inputs: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
        x = self.common(inputs)
        return self.actor(x), self.critic(x)


In [7]:
num_actions = env.action_space.n # 2
num_hidden_units = 128

model = ActorCritic(num_actions, num_hidden_units)

In [2]:
# Wrap OpenAI Gym's `env.step` call as an operation in a TensorFlow function.
# This would allow it to be included in a callable TensorFlow graph.

def env_step(action: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np. ndarray]:
  """Returns state, reward and done flag given an action."""

  state, reward, done, _ = env.step(action)
  return (state.astype(np.float32),
          np.array(reward, np.int32),
          np.array(done, np.int32))


def tf_env_step(action: tf.Tensor) -> List[tf.Tensor]:
  return tf.numpy_function(env_step, [action],
                           [tf.float32, tf.int32, tf.int32])
