In [1]:
import gym
from gym.spaces.box import Box
import torch
import numpy as np
import random

from baselines import bench
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from baselines.common.vec_env.shmem_vec_env import ShmemVecEnv
from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv

from baselines.common.vec_env import VecEnvWrapper, VecExtractDictObs, VecMonitor, VecNormalize

In [2]:
seed=0
random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)

In [3]:
class VecTransposeImage(VecEnvWrapper):
    """
    Based on: https://github.com/DLR-RM/stable-baselines3
    Re-order channels, from (H,W,C) to (C,H,W).
    It is required for PyTorch convolution layers.
    """

    def __init__(self, venv):
        height, width, channels = venv.observation_space.shape
        observation_space = Box(low=0, 
                                high=255,
                                shape=(channels, height, width),
                                dtype=venv.observation_space.dtype)
        super(VecTransposeImage, self).__init__(venv, observation_space=observation_space)

    @staticmethod
    def transpose_image(image):
        """
        Transpose an image or batch of images (re-order channels).
        :param image: (np.ndarray)
        :return: (np.ndarray)
        """
        if len(image.shape) == 3:
            return np.transpose(image, (2, 0, 1))
        return np.transpose(image, (0, 3, 1, 2))

    def step_wait(self):
        observations, rewards, dones, infos = self.venv.step_wait()
        return self.transpose_image(observations), rewards, dones, infos

    def reset(self):
        """
        Reset all environments
        """
        return self.transpose_image(self.venv.reset())

    def close(self):
        self.venv.close()       

In [4]:
class VecPyTorch(VecEnvWrapper):
    def __init__(self, venv, device):
        """
        Taken from: https://github.com/harry-uglow/Curriculum-Reinforcement-Learning

        Converts array of observations to Tensors. This makes them
        usable as input to a PyTorch policy network.     
        """
        super(VecPyTorch, self).__init__(venv)
        self.device = device

    def reset(self):
        """
        Convert numpy.array observations into torch.tensor for policy network.
        """
        obs = self.venv.reset()
        # convert obs to torch tensor
        obs = torch.from_numpy(obs).float().to(self.device)
        return obs

    def step_async(self, actions):
        """
        Convert torch.tensor actions into numpy.array for envs.
        """
        if isinstance(actions, torch.LongTensor) or len(actions.shape) > 1:
            # Squeeze the dimension for discrete actions
            actions = actions.squeeze(1)        
        actions = actions.cpu().numpy()
        self.venv.step_async(actions)

    def step_wait(self):
        """
        Convert numpy.array observations into torch.tensor for policy network.
        Convert numpy.array rewards into torch.tensor for policy network.
        """      
        obs, reward, done, info = self.venv.step_wait()
        # convert obs to torch tensor
        obs = torch.from_numpy(obs).float().to(self.device)
        # convert reward to torch tensor
        reward = torch.from_numpy(reward).unsqueeze(dim=1).float()
        return obs, reward, done, info    

In [7]:
def make_env(env_id, rank):  

    def _thunk():

        if env_id.startswith('procgen'):
            env = gym.make(env_id, 
                           start_level=0, 
                           num_levels=100, 
                           distribution_mode='easy')
        else: 
            raise NotImplementedError   
        
        return env
    return _thunk

def make_vec_envs(env_name, num_processes):
    envs = [make_env(env_id=env_name, rank=i) for i in range(num_processes)]
    
    if len(envs) > 1:
        envs = SubprocVecEnv(envs)
    else:
        envs = DummyVecEnv(envs)   

    # re-order channels, (H,W,C) => (C,H,W). 
    # required for PyTorch convolution layers.
    envs = VecTransposeImage(envs)    
    
    envs = VecMonitor(venv=envs, 
                      filename=None,
                      keep_buf=100)    
        
    # normalise the rewards during training but not during testing
    # we don't normalise the obs as the network does this /255.
    envs = VecNormalize(envs, ob=False)            

    # wrapper to convert observation arrays to torch.tensors
    envs = VecPyTorch(envs, device)        

    return envs

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

env_name = "procgen:procgen-coinrun-v0"
num_processes = 2

envs = make_vec_envs(env_name,num_processes)

In [9]:
frame = envs.reset()

In [10]:
frame.shape

torch.Size([2, 3, 64, 64])

In [12]:
torch.sum(frame[0]-frame[1])

tensor(-76908., device='cuda:0')