In [1]:
import gym
import numpy as np
import matplotlib.pyplot as plt
%matplotlib tk # Jupyter notebook 에서 pop up window

In [5]:
class GridEnv(gym.Env):
    def __init__(self):
        
        self.map_size = (3,3)
        self.agent_pos = [0,2]
        self.obstacle = [[1,1],[1,2]]
        self.goal = [2,2]
        self.action_space = gym.spaces.Discrete(4)
        self.obs_space = gym.spaces.Discrete(3)


    def step(self, action):
        if action == 0: #Left
            self.agent_pos[0] += -1
        elif action == 1: #Right
            self.agent_pos[0] += +1
        elif action == 2: #Up
            self.agent_pos[1] += -1
        elif action == 3: #Down
            self.agent_pos[1] += +1
        else:
            raise Exception("Action is not defined")
            
        return self.obs(), self.get_reward(), self._is_done()
    
    def obs(self):
        if self.agent_pos in self.obstacle:
            return 0
        
        elif self.agent_pos == self.goal:    
            return 1
        
        else:    
            return 2
    
    def _is_done(self):
        if self.agent_pos[0] < 0 \
        or self.agent_pos[1] < 0 \
        or self.agent_pos[0] > 2 \
        or self.agent_pos[1] > 2:
            
            return True
        elif self.agent_pos == self.goal:
            
            return True
        else:
            return False
            
    def reset(self):
        self.world = np.zeros(self.map_size)
        
        self.world[self.obstacle[0][0],self.obstacle[0][1]] = 2
        self.world[self.obstacle[1][0],self.obstacle[1][1]] = 2
        self.agent_pos = [0,2]

        return self.obs()

    def render(self,episode,step):
        # Code for visualization
        plt.ion()
        plt.title("Grid World")
        plt.figtext(0.9,0.9, f"Episode = {episode}")
        plt.figtext(0.9,0.8, "Step : {}".format(step))
        
        self.world[self.agent_pos[0], self.agent_pos[1]] = -1
        self.world[self.goal[0],self.goal[1]] = 3
        plt.matshow(self.world,fignum=0)
        plt.draw()
        plt.pause(0.1)
        plt.clf()
        self.world[self.agent_pos[0], self.agent_pos[1]] = 0
        
        self.world[self.obstacle[0][0],self.obstacle[0][1]] = 2
        self.world[self.obstacle[1][0],self.obstacle[1][1]] = 2
        
    def render_text(self,obs,reward):
        plt.figtext(0.9,0.6, f"Reward : {reward}")
        if obs == 2:
            plt.figtext(0.9,0.7, "HI")
        elif obs == 1:
            plt.figtext(0.9,0.7, "GOAL IN")
        elif obs == 0:
            plt.figtext(0.9,0.7, "Obstacle")
            
    def get_reward(self):
        if self.agent_pos in self.obstacle:
            return -1
        elif self.agent_pos == self.goal:
            return +1
        else:
            return 0
        
    
    def close(self):
        # Clear env
        pass

In [6]:
env = GridEnv()

In [7]:
for episode in range(100):
    obs = env.reset()
    for step in range(100):
        env.render(episode, step)
        action = np.random.randint(3)
        obs, reward, is_done = env.step(action)
        env.render_text(obs,reward)
        
        if is_done:                
            break;
env.close()
    