In [1]:
import gym
import os
import numpy as np
from pyvirtualdisplay import Display

from cartpole.config import get_cfg_defaults
from cartpole.utils import ReplayMemory



cfg = get_cfg_defaults()

devices = ",".join(str(i) for i in cfg.SYSTEM.DEVICES)
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = devices

In [2]:
"""
If you want to access the behind-the.scenes dynamics of a specific environment, 
then you use the unwrapped property.
"""
display = Display(visible=0, size=cfg.SYSTEM.VIRTUAL_SCREEN)
display.start()
env = gym.make("CartPole-v0").unwrapped



In [39]:
import tensorflow as tf
from tensorflow.python.keras import layers, models

class DQN(models.Model):
    def __init__(self, outputs):
        super(DQN, self).__init__()
        self.conv1 = layers.Conv2D(filters=16, kernel_size=5, strides=2, padding="same")
        self.conv2 = layers.Conv2D(filters=32, kernel_size=5, strides=2, padding="same")
        self.conv3 = layers.Conv2D(filters=32, kernel_size=5, strides=2, padding="same")
        self.bn1 = layers.BatchNormalization(axis=-1)
        self.bn2 = layers.BatchNormalization(axis=-1)
        self.bn3 = layers.BatchNormalization(axis=-1)
        self.flat = layers.Flatten()
        
        self.head = layers.Dense(units=outputs)
        
    def call(self, x):
        x = tf.convert_to_tensor(x, dtype=tf.float32)
        x = tf.nn.relu(self.bn1(self.conv1(x)))
        x = tf.nn.relu(self.bn2(self.conv2(x)))
        x = tf.nn.relu(self.bn3(self.conv3(x)))
        x = self.flat(x)
        
        # use L1-loss, no need other activation for outputs
        return self.head(x)
    
tf.keras.backend.clear_session()
model = DQN(2)    

In [31]:
env.reset()
im = env.render(mode="rgb_array")
im = im.astype('float32')