<a href="https://colab.research.google.com/github/wjdqlsdlsp/AI_using_pytorch-reinforce-learning/blob/main/%E1%84%80%E1%85%AA%E1%84%8C%E1%85%A65_Cliff_Walking_with_Deep_Sarsa_%E1%84%87%E1%85%A1%E1%86%A8%E1%84%8C%E1%85%A5%E1%86%BC%E1%84%87%E1%85%B5%E1%86%AB.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **[인공지능] 과제5 Cliff Walking 예제 Sarsa 구현**


In [None]:
import numpy as np
import random
from tqdm import tqdm
from collections import defaultdict, namedtuple, deque
from gym.envs.toy_text.cliffwalking import CliffWalkingEnv # Cliff Walking 환경

import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
class QtoPolicy:
    def __init__(self):
        self.action = ['↑', '→', '↓', '←', 'X']

    def printPolicy(self, Q):
        policy = np.array([np.argmax(Q[key]) if key in Q else -1 for key in np.arange(48)])
        v = ([np.max(Q[key]) if key in Q else 0 for key in np.arange(48)])
        actions = np.stack([self.action for _ in range(len(policy))], axis=0)
        policy[36:] = np.array([0] + [3] * 10 + [4])

        print(np.take(actions, np.reshape(policy, (4, 12))))
        print('')

In [None]:
# Experience replay를 위한 replay buffer class 생성
Transition = namedtuple('Transition',
                        ('state', 'action', 'reward', 'next_state'))


class ReplayBuffer(object):

    def __init__(self, capacity):
        self.buffer = deque([],maxlen=capacity)

    def push(self, *args):
        self.buffer.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def __len__(self):
        return len(self.buffer)

In [None]:
class DNN(nn.Module):
    # DNN 모델 설계 및 초기값 설정
    def __init__(self, inputs, outputs):
        super(DNN, self).__init__()
        self.x_dim = inputs
        self.y_dim = outputs
        self.fc_variable_no = 100

        # network 용 변수
        self.fc_in = nn.Linear(self.x_dim, self.fc_variable_no)
        self.fc_hidden1 = nn.Linear(self.fc_variable_no, self.fc_variable_no)
        self.fc_hidden2 = nn.Linear(self.fc_variable_no, self.fc_variable_no)
        self.fc_hidden3 = nn.Linear(self.fc_variable_no, self.fc_variable_no)
        self.fc_out = nn.Linear(self.fc_variable_no, self.y_dim)
        self.relu = nn.ReLU()

    # 전파 과정
    def forward(self, x):
        x = torch.reshape(x, [-1, self.x_dim])
        x = self.relu(self.fc_in(x))
        x = self.relu(self.fc_hidden1(x))
        x = self.relu(self.fc_hidden2(x))
        x = self.relu(self.fc_hidden3(x))
        x = self.fc_out(x)
        return x

In [None]:
class DeepSarsa:
    def __init__(self):
        self.state_no = 48 # state의 갯수
        self.action_no = 4 # action의 갯수
        self.alpha = 0.001 # 학습률
        self.gamma = 0.99 # Discount factor
        self.epsilon = 0.5 # 앱실론

        
        self.batch_size = 32  # Experience replay에서의 batch size
        self.training_interval = 10  # Q-Network 학습 interval
        # self.target_update_interval = 100  # target Q-Network 학습 interval

        self.main_net = DNN(self.state_no, self.action_no) # DNN 모델
        # self.target_net = DNN(self.state_no, self.action_no) # DNN 모델 (타겟 계산)

        # # Fixed target Q-Network를 정의하고 main Q-network와 동일하게 초기화
        # self.target_net.load_state_dict(self.main_net.state_dict())
        # self.target_net.eval()

        self.optimizer = optim.Adam(self.main_net.parameters(), lr=self.alpha)
        # Experience replay를 위한 buffer 정의
        self.buffer = ReplayBuffer(500)

    # state의 인덱스가 연속적인 의미를 가지고 있지 않으므로 효과적인 학습을
    # 위해 one-hot encoding을 수행
    def one_hot_state(self, state):
        one_hot_encoded = np.zeros((1, self.state_no))
        one_hot_encoded[0, state] = 1

        return one_hot_encoded
    
    # 학습이 끝난 후 Q-Network에서 Q-value 계산하는 함수
    def get_q_values(self):
        q_values = defaultdict(lambda: [0.0] * self.action_no)
        # 각 state 별 Q-value 계산
        for i in range(self.state_no):
            state = torch.tensor(self.one_hot_state(i)).float()
            q_values[i] = self.main_net(state).tolist()
        return q_values

    # 신경망 최적화 모델
    def optimize_model(self, next_action):
        # 버퍼의 크기가 배치사이즈보다 작을 경우, return
        if len(self.buffer) < self.batch_size:
            return
        
        # 학습을 위한 transition의 랜덤 배치를 선택
        transitions = self.buffer.sample(self.batch_size)
        
        # 위에서 정의한 Transition을 이용하여 각각의 이름을 설정
        batch = Transition(*zip(*transitions))

        # batch.next_state값을 map을 이용하여 다음으로 변환 : None 여부 판단
        non_final_mask = torch.tensor(tuple(map(lambda s : s is not None,
                                                batch.next_state)), dtype=torch.bool)
        # torch.cat을 이용하여 tensor를 연결
        non_final_next_states = torch.cat([s for s in batch.next_state
                                                if s is not None])
        state_batch = torch.cat(batch.state)
        action_batch = torch.cat(batch.action)
        reward_batch = torch.cat(batch.reward)

        # 밸류 값 계산을 위해 main_net이용. Q(s)를 계산한 뒤, 이에 대한 action의 열을 선택
        state_action_values = self.main_net(state_batch).gather(1, action_batch)

        # DQN과 가장 다른 점으로, 타겟 값을 계산할 때, next_action 값을 사용
        next_state_values = self.main_net(non_final_next_states)[next_action].detach()
        # 공식에 의해서, 타겟 값 계산
        target_state_action_values = (next_state_values * self.gamma) + reward_batch

        # 손실 함수 정의
        criterion = nn.SmoothL1Loss()
        loss = criterion(state_action_values, target_state_action_values)

        # 오류 역전파 진행
        self.optimizer.zero_grad()
        loss.backward()
        for param in self.main_net.parameters():
            param.grad.data.clamp_(-10, 10)
        # 그레디언트 갱신
        self.optimizer.step()

    # DQN 갱신 부분
    def update(self, state, action, reward, next_state, time_step, next_action):
        # 버퍼에 값 저장
        self.buffer.push(torch.from_numpy(state).float(),
                         torch.tensor(action).reshape((-1, 1)),
                         torch.tensor(reward).reshape((-1, 1)),
                         torch.from_numpy(next_state).float())
        # train_interval 마다, DNN 최적화 함수 실행
        if (time_step + 1) % self.training_interval == 0:
            self.optimize_model(next_action)
       


    # DQN epsilon-greedy 정책
    def act(self, state):
        # 설정한 입실론 값보다 작을 경우, 랜덤 액션을 취함
        if np.random.rand() < self.epsilon:
            action = np.random.choice(self.action_no)
        # 설정한 입실론 값보다 클 경우
        else:
            with torch.no_grad():
                # 모델에 입력하기 위해 텐서 변환
                state = torch.from_numpy(state).float()
                # main_net을 통해 q_value값을 얻음
                q_values = self.main_net(state)
                # q_value값이 가장 큰 값인 행동을 선택하여 return
                action = torch.argmax(q_values, 1).item()
        return action


In [None]:
env = CliffWalkingEnv()
policy = QtoPolicy()

In [None]:
agent_DeepSarsa = DeepSarsa()
time_step = 0
for ep in tqdm(range(5000)):
    done = False
    state = env.reset()
    state = agent_DeepSarsa.one_hot_state(state)
    action = agent_DeepSarsa.act(state)
    ep_reward = 0
    ep_steps = 0
    if ep % 50 ==0:
        print("\n")
        policy.printPolicy(agent_DeepSarsa.get_q_values())
    while not done:
        next_state, reward, done, info = env.step(action)

        next_state = agent_DeepSarsa.one_hot_state(next_state)

        next_action = agent_DeepSarsa.act(next_state)

        agent_DeepSarsa.update(state, action, reward, next_state, time_step, next_action)
        time_step = time_step + 1

        ep_reward+=reward
        state = next_state
        action = next_action
        ep_steps = ep_steps + 1

  return F.smooth_l1_loss(input, target, reduction=self.reduction, beta=self.beta)




[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



  1%|          | 51/5000 [00:28<09:31,  8.66it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '→' '↓' '→' '↓' '→' '→' '→' '→' '→' '↓' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



  2%|▏         | 99/5000 [00:36<12:28,  6.55it/s]



[['↑' '↑' '→' '↑' '↑' '↑' '↑' '↑' '→' '↑' '↑' '↑']
 ['↑' '↑' '↑' '→' '→' '→' '↑' '↑' '→' '→' '→' '→']
 ['↑' '→' '→' '→' '↑' '↑' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



  3%|▎         | 152/5000 [00:47<09:56,  8.13it/s]



[['↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '→' '↑' '↑' '↑']
 ['↑' '↑' '↑' '↑' '↑' '↑' '↑' '→' '↑' '↑' '↑' '↑']
 ['↑' '→' '→' '→' '↑' '→' '→' '→' '→' '→' '→' '↑']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



  4%|▍         | 201/5000 [00:55<13:27,  5.94it/s]



[['↑' '↑' '→' '↑' '→' '→' '→' '→' '↑' '→' '↑' '↑']
 ['→' '↑' '→' '→' '↑' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '→' '→' '→' '↑' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



  5%|▌         | 250/5000 [01:04<15:42,  5.04it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '→' '↓']
 ['↓' '→' '↓' '↓' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



  6%|▌         | 301/5000 [01:14<20:08,  3.89it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '→' '↓' '→' '↓' '→' '↓']
 ['→' '↓' '↓' '↓' '↓' '↓' '→' '↓' '→' '→' '↓' '↓']
 ['↓' '↓' '↓' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



  7%|▋         | 351/5000 [01:23<11:36,  6.68it/s]



[['→' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '→' '↑' '↑' '→']
 ['↑' '↑' '↑' '↑' '↑' '↑' '↑' '→' '→' '↑' '↑' '↑']
 ['↑' '→' '→' '→' '→' '→' '→' '→' '↑' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



  8%|▊         | 400/5000 [01:33<20:39,  3.71it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '↑' '→' '→' '↑' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↑']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



  9%|▉         | 450/5000 [01:47<12:38,  6.00it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '←' '←' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 10%|█         | 500/5000 [01:56<17:36,  4.26it/s]



[['↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑']
 ['↑' '↑' '↑' '↑' '→' '↑' '↑' '↑' '↑' '↓' '↑' '↑']
 ['↑' '→' '→' '→' '→' '↑' '↑' '→' '↑' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 11%|█         | 550/5000 [02:03<10:12,  7.26it/s]



[['→' '↓' '→' '↓' '↓' '→' '↓' '→' '→' '↓' '↓' '→']
 ['↓' '→' '↓' '↓' '→' '→' '↓' '↓' '↓' '↓' '→' '↓']
 ['↓' '→' '→' '→' '→' '↑' '→' '→' '↑' '↑' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 12%|█▏        | 600/5000 [02:12<14:14,  5.15it/s]



[['↓' '↑' '↑' '↑' '↓' '↑' '↓' '↓' '↓' '↓' '↑' '↑']
 ['↑' '↑' '↑' '↓' '→' '↑' '↓' '↑' '↓' '↑' '↑' '↑']
 ['↑' '←' '←' '←' '←' '→' '←' '←' '←' '←' '←' '↑']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 13%|█▎        | 650/5000 [02:22<09:36,  7.54it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '↓' '↑' '↑' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '↑' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 14%|█▍        | 702/5000 [02:32<09:23,  7.62it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '→' '↓' '↓' '→']
 ['↓' '↓' '↓' '↓' '↓' '↓' '→' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↑' '→' '↑' '↑' '→' '↑' '→' '↑' '→' '↑' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 15%|█▍        | 749/5000 [02:40<17:07,  4.14it/s]



[['↓' '↓' '↓' '→' '↓' '→' '↓' '→' '↑' '→' '↓' '↓']
 ['↓' '→' '↑' '↑' '↑' '↑' '↓' '→' '→' '↓' '↓' '↓']
 ['↓' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 16%|█▌        | 800/5000 [02:48<09:27,  7.41it/s]



[['↓' '↓' '↓' '↓' '↓' '←' '↓' '↓' '←' '↓' '↓' '↓']
 ['↓' '↓' '↓' '←' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 17%|█▋        | 850/5000 [03:01<22:19,  3.10it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↑' '↓']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 18%|█▊        | 902/5000 [03:11<10:53,  6.27it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '↓' '→' '→']
 ['→' '→' '→' '→' '→' '→' '↓' '→' '→' '→' '→' '→']
 ['→' '←' '→' '→' '←' '↑' '←' '→' '←' '↑' '←' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 19%|█▉        | 950/5000 [03:19<12:52,  5.24it/s]



[['↑' '↑' '↑' '→' '↑' '↑' '→' '→' '↓' '↑' '↑' '↑']
 ['↑' '↑' '↑' '↓' '↑' '↑' '↑' '→' '↑' '↑' '↓' '↑']
 ['↑' '↑' '→' '→' '→' '↑' '↑' '→' '↑' '↑' '→' '↑']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 20%|██        | 1000/5000 [03:31<10:57,  6.08it/s]



[['↓' '→' '→' '→' '→' '→' '→' '↓' '↓' '↓' '↓' '→']
 ['→' '→' '→' '→' '→' '↓' '→' '↓' '↓' '↓' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '←' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 21%|██        | 1051/5000 [03:41<13:26,  4.90it/s]



[['↓' '↓' '→' '→' '↓' '→' '↓' '↑' '→' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '→' '→' '→' '↓' '↓' '↓' '↓']
 ['→' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 22%|██▏       | 1102/5000 [03:50<09:03,  7.17it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '→' '↓' '↓' '↓' '↓' '→' '↓' '↓']
 ['↓' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 23%|██▎       | 1151/5000 [03:59<09:26,  6.80it/s]



[['↓' '↓' '↓' '↓' '↓' '→' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '→' '↓' '↓' '→' '↓']
 ['↓' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 24%|██▍       | 1201/5000 [04:07<07:33,  8.38it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '←' '←' '↓' '←']
 ['↓' '↓' '↓' '←' '↓' '↓' '↓' '←' '↓' '↓' '←' '↓']
 ['↓' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 25%|██▌       | 1250/5000 [04:15<10:42,  5.83it/s]



[['→' '→' '↓' '↓' '→' '↓' '↓' '→' '→' '→' '↓' '→']
 ['↓' '↓' '→' '↓' '→' '→' '↓' '↓' '→' '→' '↓' '↓']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 26%|██▌       | 1300/5000 [04:25<09:01,  6.83it/s]



[['→' '→' '↓' '↓' '↓' '→' '→' '→' '→' '↓' '↓' '→']
 ['→' '↓' '→' '↓' '→' '→' '→' '→' '↓' '→' '→' '↓']
 ['→' '↑' '↑' '→' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 27%|██▋       | 1352/5000 [04:34<08:06,  7.49it/s]



[['→' '→' '→' '→' '↓' '→' '→' '→' '→' '↓' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 28%|██▊       | 1401/5000 [04:44<10:51,  5.52it/s]



[['→' '→' '↓' '↓' '→' '↓' '→' '↓' '→' '↓' '→' '↓']
 ['→' '→' '↓' '↓' '↓' '↓' '↓' '→' '↓' '↓' '↓' '↓']
 ['↓' '→' '↑' '→' '→' '→' '→' '↑' '↑' '↑' '↑' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 29%|██▉       | 1450/5000 [04:53<11:58,  4.94it/s]



[['→' '→' '→' '→' '↓' '→' '→' '→' '↓' '→' '→' '→']
 ['→' '→' '→' '↓' '→' '→' '→' '↓' '↓' '→' '↓' '→']
 ['→' '→' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 30%|███       | 1500/5000 [05:05<15:11,  3.84it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↑' '↑' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↑' '↓' '↑' '↓' '↓']
 ['↓' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 31%|███       | 1550/5000 [05:15<23:30,  2.45it/s]



[['←' '←' '←' '←' '↓' '←' '↓' '↓' '←' '↓' '↓' '→']
 ['←' '↓' '←' '↓' '↓' '→' '→' '←' '↓' '→' '↓' '↓']
 ['←' '←' '←' '→' '←' '→' '←' '←' '→' '←' '→' '←']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 32%|███▏      | 1600/5000 [05:27<16:22,  3.46it/s]



[['↓' '↓' '↓' '→' '→' '↓' '→' '→' '→' '→' '↓' '→']
 ['↓' '→' '↓' '↓' '↓' '↓' '→' '→' '↓' '↓' '→' '↓']
 ['↓' '→' '→' '→' '→' '→' '→' '→' '↑' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 33%|███▎      | 1651/5000 [05:40<12:20,  4.52it/s]



[['↓' '→' '→' '↓' '→' '↓' '↓' '→' '↓' '→' '→' '↓']
 ['↓' '↓' '→' '↓' '↓' '→' '↓' '→' '→' '→' '→' '↓']
 ['↓' '→' '→' '→' '→' '↑' '→' '→' '→' '↑' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 34%|███▍      | 1700/5000 [05:48<08:08,  6.75it/s]



[['→' '↑' '→' '→' '→' '↑' '↑' '↑' '↑' '→' '↑' '↑']
 ['→' '↑' '→' '↑' '↑' '↑' '↑' '↑' '↑' '→' '↑' '↑']
 ['→' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 35%|███▍      | 1749/5000 [05:58<10:39,  5.08it/s]



[['→' '↓' '↓' '→' '→' '↓' '↓' '↓' '↓' '→' '↓' '→']
 ['↓' '→' '→' '↓' '↓' '↓' '→' '→' '→' '→' '↓' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 36%|███▌      | 1801/5000 [06:09<08:32,  6.25it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '←' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '←' '←' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '←']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 37%|███▋      | 1850/5000 [06:18<10:15,  5.12it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↑']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 38%|███▊      | 1899/5000 [06:28<10:41,  4.83it/s]



[['→' '↓' '→' '→' '↓' '↓' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '↓' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 39%|███▉      | 1951/5000 [06:39<07:47,  6.52it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 40%|████      | 2000/5000 [06:49<11:08,  4.49it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '↓' '→' '→' '→' '↓' '↓' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '↑' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 41%|████      | 2051/5000 [07:02<07:54,  6.22it/s]



[['→' '→' '→' '↓' '→' '→' '→' '→' '→' '↓' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '↓' '↓' '↓' '→' '↓']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 42%|████▏     | 2101/5000 [07:13<08:53,  5.44it/s]



[['↓' '↓' '→' '↓' '↓' '↓' '↓' '↓' '↓' '→' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 43%|████▎     | 2150/5000 [07:25<19:25,  2.45it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '→' '↑' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 44%|████▍     | 2201/5000 [07:35<08:11,  5.69it/s]



[['→' '→' '→' '→' '↓' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '↓' '→' '↓' '→' '→' '→']
 ['→' '→' '→' '↑' '↑' '→' '↑' '↑' '↑' '↑' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 45%|████▍     | 2249/5000 [07:47<11:12,  4.09it/s]



[['→' '→' '→' '↓' '→' '↓' '→' '→' '↓' '↓' '→' '↓']
 ['↓' '→' '↓' '↓' '↓' '↓' '↓' '→' '↓' '→' '→' '↓']
 ['↓' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 46%|████▌     | 2300/5000 [07:56<05:45,  7.81it/s]



[['→' '→' '→' '↑' '→' '→' '→' '→' '→' '→' '→' '↑']
 ['→' '→' '→' '→' '→' '→' '→' '→' '↑' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 47%|████▋     | 2351/5000 [08:05<09:31,  4.64it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '←' '←' '→' '←' '←' '←' '→' '→' '←' '←' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 48%|████▊     | 2401/5000 [08:15<08:09,  5.30it/s]



[['→' '→' '→' '→' '→' '↓' '→' '→' '→' '↓' '→' '→']
 ['→' '→' '→' '↓' '↓' '→' '↓' '→' '→' '→' '→' '→']
 ['→' '→' '↑' '→' '→' '→' '→' '→' '↑' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 49%|████▉     | 2450/5000 [08:26<07:38,  5.56it/s]



[['→' '↓' '→' '→' '→' '↓' '→' '↓' '→' '↓' '↓' '→']
 ['↓' '→' '→' '↓' '↓' '→' '↓' '→' '→' '→' '→' '↓']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 50%|█████     | 2500/5000 [08:40<10:38,  3.91it/s]



[['↓' '↓' '↓' '↓' '↓' '→' '↓' '→' '→' '↓' '↓' '↓']
 ['↓' '↓' '→' '→' '→' '↓' '→' '↓' '→' '↓' '↓' '↓']
 ['↓' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 51%|█████     | 2551/5000 [08:54<08:08,  5.02it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '↓' '↓' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '↑' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 52%|█████▏    | 2601/5000 [09:05<08:49,  4.53it/s]



[['→' '→' '↓' '→' '→' '→' '→' '↓' '↓' '→' '→' '→']
 ['↓' '→' '→' '↓' '↓' '↓' '↓' '↓' '↓' '→' '↓' '→']
 ['↓' '↑' '→' '→' '→' '↑' '→' '↑' '↑' '↑' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 53%|█████▎    | 2650/5000 [09:18<10:34,  3.70it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '→' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '→' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 54%|█████▍    | 2700/5000 [09:28<08:29,  4.52it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '↑' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 55%|█████▌    | 2750/5000 [09:37<07:07,  5.26it/s]



[['←' '↓' '→' '←' '→' '→' '←' '←' '→' '→' '←' '←']
 ['←' '←' '→' '←' '↓' '←' '↓' '←' '←' '→' '←' '←']
 ['←' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 56%|█████▌    | 2802/5000 [09:50<05:10,  7.07it/s]



[['→' '↓' '→' '→' '↓' '→' '→' '→' '→' '↓' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '↓' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 57%|█████▋    | 2850/5000 [10:02<06:35,  5.43it/s]



[['↓' '→' '→' '↓' '→' '→' '↓' '→' '→' '→' '↓' '↓']
 ['↓' '↓' '→' '→' '→' '→' '↓' '→' '→' '→' '→' '↓']
 ['↓' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 58%|█████▊    | 2900/5000 [10:12<06:36,  5.29it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '→' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 59%|█████▉    | 2950/5000 [10:25<07:51,  4.35it/s]



[['↓' '↓' '→' '→' '→' '→' '→' '↓' '↓' '→' '→' '↓']
 ['↓' '↓' '→' '→' '↓' '↓' '↓' '↓' '→' '→' '→' '↓']
 ['→' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 60%|██████    | 3000/5000 [10:37<08:39,  3.85it/s]



[['↓' '↓' '↓' '↓' '→' '↓' '↓' '↓' '↓' '↓' '→' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 61%|██████    | 3051/5000 [10:47<03:23,  9.58it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['→' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 62%|██████▏   | 3100/5000 [10:59<05:59,  5.29it/s]



[['→' '→' '↓' '↓' '↓' '↓' '↓' '↓' '→' '→' '→' '↓']
 ['→' '↓' '↓' '↓' '↓' '→' '↓' '→' '→' '→' '→' '↓']
 ['↓' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 63%|██████▎   | 3150/5000 [11:10<10:36,  2.91it/s]



[['→' '→' '↓' '↓' '→' '↓' '↓' '→' '↓' '↓' '↓' '→']
 ['→' '→' '→' '→' '↓' '→' '→' '→' '→' '↓' '→' '↓']
 ['→' '←' '←' '→' '←' '←' '→' '←' '←' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 64%|██████▍   | 3201/5000 [11:21<06:22,  4.70it/s]



[['↓' '→' '↓' '→' '→' '→' '↓' '→' '→' '→' '→' '→']
 ['→' '→' '→' '↓' '→' '↓' '→' '→' '→' '→' '→' '→']
 ['→' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 65%|██████▌   | 3250/5000 [11:34<07:53,  3.70it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '↑' '→' '↑' '↑' '→' '→' '↑' '↑' '→' '↑' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 66%|██████▌   | 3301/5000 [11:44<03:46,  7.49it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 67%|██████▋   | 3351/5000 [11:55<05:44,  4.78it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['→' '→' '→' '↓' '↓' '→' '→' '↓' '→' '→' '→' '→']
 ['→' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 68%|██████▊   | 3401/5000 [12:04<04:10,  6.38it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '↑' '↑' '→' '↑' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 69%|██████▉   | 3450/5000 [12:16<04:41,  5.51it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↑' '→' '↑' '→' '↑' '↑' '→' '↑' '↑' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 70%|██████▉   | 3499/5000 [12:27<03:21,  7.44it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '↓' '→' '↓' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 71%|███████   | 3549/5000 [12:38<05:49,  4.16it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '←' '←' '←' '→' '←' '←' '←' '←' '←' '←' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 72%|███████▏  | 3601/5000 [12:47<04:14,  5.51it/s]



[['→' '↑' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↑' '→']
 ['→' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 73%|███████▎  | 3653/5000 [12:59<02:31,  8.92it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↑' '↑' '↑' '↑' '↑' '→' '→' '↑' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 74%|███████▍  | 3702/5000 [13:09<03:15,  6.63it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '↓' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 75%|███████▌  | 3751/5000 [13:17<03:18,  6.29it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '←' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 76%|███████▌  | 3799/5000 [13:29<05:05,  3.93it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '←' '←' '←' '←' '←' '←' '←' '←' '→' '←' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 77%|███████▋  | 3850/5000 [13:40<05:03,  3.79it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '→' '↓' '↓' '↓' '↓']
 ['→' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 78%|███████▊  | 3900/5000 [13:50<02:56,  6.24it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '→' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 79%|███████▉  | 3950/5000 [13:59<01:42, 10.25it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 80%|████████  | 4000/5000 [14:10<03:49,  4.35it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 81%|████████  | 4050/5000 [14:19<03:23,  4.67it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 82%|████████▏ | 4100/5000 [14:30<03:06,  4.84it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 83%|████████▎ | 4150/5000 [14:40<01:46,  7.97it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '↑' '↑' '↑' '→' '↑' '↑' '↑' '↑' '→' '↑' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 84%|████████▍ | 4201/5000 [14:50<02:43,  4.87it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 85%|████████▌ | 4250/5000 [15:00<02:01,  6.15it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '←' '↑' '←' '←' '↑' '↑' '←' '↑' '→' '↑' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 86%|████████▌ | 4300/5000 [15:09<01:38,  7.11it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '←' '←' '←' '←' '←' '←' '←' '↑' '←' '←' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 87%|████████▋ | 4351/5000 [15:19<01:10,  9.24it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 88%|████████▊ | 4402/5000 [15:28<00:57, 10.48it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 89%|████████▉ | 4450/5000 [15:38<02:02,  4.50it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 90%|█████████ | 4501/5000 [15:47<00:51,  9.65it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '←' '←' '←' '←' '←' '←' '←' '↑' '←' '←' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 91%|█████████ | 4550/5000 [16:00<02:03,  3.65it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 92%|█████████▏| 4599/5000 [16:10<00:56,  7.07it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '→' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '←' '→' '→' '→' '←' '→' '→' '←' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 93%|█████████▎| 4651/5000 [16:19<00:59,  5.83it/s]



[['→' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '→']
 ['↓' '→' '↓' '↓' '↓' '↓' '→' '↓' '↓' '→' '↓' '↓']
 ['↓' '↑' '→' '↑' '→' '↑' '↑' '→' '↑' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 94%|█████████▍| 4701/5000 [16:30<00:56,  5.26it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 95%|█████████▌| 4750/5000 [16:40<00:40,  6.16it/s]



[['→' '←' '←' '←' '←' '→' '←' '←' '←' '←' '←' '→']
 ['→' '←' '←' '←' '←' '←' '←' '→' '←' '←' '→' '←']
 ['←' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '←']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 96%|█████████▌| 4800/5000 [16:49<00:44,  4.48it/s]



[['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓' '↓']
 ['↓' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '↓']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 97%|█████████▋| 4852/5000 [17:00<00:25,  5.91it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '↑' '→' '→' '↑' '→' '→' '↑' '↑' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 98%|█████████▊| 4900/5000 [17:07<00:16,  6.06it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '↑' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



 99%|█████████▉| 4951/5000 [17:17<00:07,  6.78it/s]



[['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '←' '←' '→' '→' '←' '←' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



100%|██████████| 5000/5000 [17:26<00:00,  4.78it/s]


학습된 Q-value를 이용하여 학습된 정책을 출력합니다.

In [None]:
print('Learned policy by DeepSarsa')
policy.printPolicy(agent_DeepSarsa.get_q_values())

Learned policy by DQN
[['↓' '↓' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→' '→']
 ['→' '→' '→' '↑' '→' '→' '→' '→' '→' '→' '→' '→']
 ['↑' '←' '←' '←' '←' '←' '←' '←' '←' '←' '←' 'X']]



In [None]:
env.close()