In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd /content/drive/MyDrive/project

/content/drive/MyDrive/project


In [None]:
from string import ascii_uppercase
#from draw_utils import *
#from pyglet.gl import *
import numpy as np
import pandas as pd
import os
import copy


# reward
move_reward = -0.1
obs_reward = -0.2
goal_reward = 10
finish_reward = 20
print('reward:' , move_reward, obs_reward, goal_reward)

local_path = os.path.abspath(os.path.join(os.path.dirname('__file__')))


class Simulator:
    def __init__(self):
        '''
        height : 그리드 높이
        width : 그리드 너비 
        inds : A ~ Q alphabet list
        '''
        # Load train data
        self.files = pd.read_csv(os.path.join(local_path, "./data/train1.csv")) #"./data/factory_order_train.csv"))
        self.height = 10
        self.width = 9
        self.inds = list(ascii_uppercase)[:17]

    def set_box(self):
        '''
        아이템들이 있을 위치를 미리 정해놓고 그 위치 좌표들에 아이템이 들어올 수 있으므로 그리드에 100으로 표시한다.
        데이터 파일에서 이번 에피소드 아이템 정보를 받아 가져와야 할 아이템이 있는 좌표만 -100으로 표시한다.
        self.local_target에 에이전트가 이번에 방문해야할 좌표들을 저장한다.
        따라서 가져와야하는 아이템 좌표와 end point 좌표(처음 시작했던 좌표로 돌아와야하므로)가 들어가게 된다.
        '''
        box_data = pd.read_csv(os.path.join(local_path, "./data/box.csv"))

        # 물건이 들어있을 수 있는 경우
        for box in box_data.itertuples(index = True, name ='Pandas'):
            self.grid[getattr(box, "row")][getattr(box, "col")] = 0

        # 물건이 실제 들어있는 경우
        order_item = list(set(self.inds) & set(self.items))
        order_csv = box_data[box_data['item'].isin(order_item)]
        
        for order_box in order_csv.itertuples(index = True, name ='Pandas'):
            self.grid[getattr(order_box, "row")][getattr(order_box, "col")] = 200
            # local target에 가야 할 위치 좌표 넣기
            self.local_target.append(
                [getattr(order_box, "row"),
                 getattr(order_box, "col")]
                )

        #self.local_target.append([9,4]) 
        # 알파벳을 Grid에 넣어서 -> grid에 2Dconv 적용 가능

    def set_obstacle(self):
        '''
        장애물이 있어야하는 위치는 미리 obstacles.csv에 정의되어 있다. 이 좌표들을 0으로 표시한다.
        '''
        obstacles_data = pd.read_csv(os.path.join(local_path, "./data/obstacles.csv"))
        for obstacle in obstacles_data.itertuples(index = True, name ='Pandas'):
            self.grid[getattr(obstacle, "row")][getattr(obstacle, "col")] = 0

    def reset(self, epi):
        '''
        reset()은 첫 스텝에서 사용되며 그리드에서 에이전트 위치가 start point에 있게 한다.

        :param epi: episode, 에피소드 마다 가져와야 할 아이템 리스트를 불러올 때 사용
        :return: 초기셋팅 된 그리드
        :rtype: numpy.ndarray
        _____________________________________________________________________________________
        items : 이번 에피소드에서 가져와야하는 아이템들
        terminal_location : 현재 에이전트가 찾아가야하는 목적지
        local_target : 한 에피소드에서 찾아가야하는 아이템 좌표, 마지막 엔드 포인트 등의 위치좌표들
        actions: visualization을 위해 에이전트 action을 저장하는 리스트
        curloc : 현재 위치
        '''

        # initial episode parameter setting
        self.epi = epi
        self.items = list(self.files.iloc[self.epi])[0]
        self.cumulative_reward = 0
        self.terminal_location = None
        self.local_target = []
        self.actions = []
        self.item_loc = False ## 수정
        
        # initial grid setting
        self.grid = np.ones((self.height, self.width), dtype="float16")

        # set information about the gridworld
        self.set_box()
        self.set_obstacle()

        # start point를 grid에 표시
        self.curloc = [9, 4]
        self.grid[int(self.curloc[0])][int(self.curloc[1])] = 100
        
        self.done = False
        
        return self.grid

    def apply_action(self, action, cur_x, cur_y):
        '''
        에이전트가 행한 action대로 현 에이전트의 위치좌표를 바꾼다.
        action은 discrete하며 4가지 up,down,left,right으로 정의된다.
        
        :param x: 에이전트의 현재 x 좌표
        :param y: 에이전트의 현재 y 좌표
        :return: action에 따라 변한 에이전트의 x 좌표, y 좌표
        :rtype: int, int
        '''
        new_x = cur_x
        new_y = cur_y
        # up
        if action == 0:
            new_x = cur_x - 1
        # down
        elif action == 1:
            new_x = cur_x + 1
        # left
        elif action == 2:
            new_y = cur_y - 1
        # right
        else:
            new_y = cur_y + 1

        return int(new_x), int(new_y)


    def get_reward(self, new_x, new_y, out_of_boundary):
        '''
        get_reward함수는 리워드를 계산하는 함수이며, 상황에 따라 에이전트가 action을 옳게 했는지 판단하는 지표가 된다.

        :param new_x: action에 따른 에이전트 새로운 위치좌표 x
        :param new_y: action에 따른 에이전트 새로운 위치좌표 y
        :param out_of_boundary: 에이전트 위치가 그리드 밖이 되지 않도록 제한
        :return: action에 따른 리워드
        :rtype: float
        '''

        # 바깥으로 나가는 경우
        if any(out_of_boundary):
            reward = obs_reward
                       
        else:
            # 장애물에 부딪히는 경우 
            if self.grid[new_x][new_y] == 0:
                reward = obs_reward  

            # 현재 목표에 도달한 경우
            elif [new_x, new_y] in self.terminal_location:
                if [new_x, new_y] == [9, 4]:
                    reward = finish_reward
                else:
                    reward = goal_reward

            # 그냥 움직이는 경우 
            else:
                reward = move_reward

        return reward

    def step(self, action):
        ''' 
        에이전트의 action에 따라 step을 진행한다.
        action에 따라 에이전트 위치를 변환하고, action에 대해 리워드를 받고, 어느 상황에 에피소드가 종료되어야 하는지 등을 판단한다.
        에이전트가 endpoint에 도착하면 gif로 에피소드에서 에이전트의 행동이 저장된다.

        :param action: 에이전트 행동
        :return:
            grid, 그리드
            reward, 리워드
            cumulative_reward, 누적 리워드
            done, 종료 여부
            goal_ob_reward, goal까지 아이템을 모두 가지고 돌아오는 finish율 계산을 위한 파라미터

        :rtype: numpy.ndarray, float, float, bool, bool/str

        (Hint : 시작 위치 (9,4)에서 up말고 다른 action은 전부 장애물이므로 action을 고정하는 것이 좋음)
        '''

        self.terminal_location = copy.deepcopy(self.local_target)
        cur_x,cur_y = self.curloc
        self.actions.append((cur_x, cur_y))

        goal_ob_reward = False
        
        new_x, new_y = self.apply_action(action, cur_x, cur_y)

        out_of_boundary = [new_x < 0, new_x >= self.height, new_y < 0, new_y >= self.width]

        # 바깥으로 나가는 경우 종료
        if any(out_of_boundary):
            pass
            #self.done = True
            #goal_ob_reward = True
        else:
            # 장애물에 부딪히는 경우 종료
            if self.grid[new_x][new_y] == 0:
                pass
                #self.done = True
                #goal_ob_reward = True

            # 현재 목표에 도달한 경우
            elif [new_x, new_y] in self.terminal_location:

                # end point 일 때
                if [new_x, new_y] == [9,4]:
                    
                    self.done = True
                    self.local_target.remove([new_x, new_y])
                
                # item 일때
                else:
                    self.local_target.remove([new_x, new_y])
                    if not self.local_target:
                        self.local_target.append([9,4])
                        self.grid[9][4] = 200
                
                if self.item_loc: #저번에가 item 이었던 자리었으면
                    self.grid[cur_x][cur_y] = 0
                    self.grid[new_x][new_y] = 100
                else:
                    self.grid[cur_x][cur_y] = 1
                    self.grid[new_x][new_y] = 100

                goal_ob_reward = True
                self.item_loc=True
                
                self.curloc = [new_x, new_y]
            else:
                # 그냥 움직이는 경우
                if self.item_loc:
                    self.grid[cur_x][cur_y] = 0
                    self.grid[new_x][new_y] = 100
                    self.item_loc = False

                else:
                    self.grid[cur_x][cur_y] = 1
                    self.grid[new_x][new_y] = 100
                    
                self.curloc = [new_x,new_y]
                
        reward = self.get_reward(new_x, new_y, out_of_boundary)
        self.cumulative_reward += reward

        # if self.done == True:
        #     if [new_x, new_y] == [9, 4]:
        #         if self.terminal_location[0] == [9, 4]:
        #                 # 완료되면 GIFS 저장

        #             if len(self.actions) < 500:
        #                 print(f'500번 안에 들어왔다! : {len(self.actions)}')
        #                 goal_ob_reward = 'finish'
        #                 height = 10
        #                 width = 9 
        #                 display = Display(visible=False, size=(width, height))
        #                 display.start()

        #                 start_point = (9, 4)
        #                 unit = 50
        #                 screen_height = height * unit
        #                 screen_width = width * unit
        #                 log_path = "./logsdqn"
        #                 data_path = "./data"
        #                 render_cls = Render(screen_width, screen_height, unit, start_point, data_path, log_path)
        #                 for idx, new_pos in enumerate(self.actions):
        #                     render_cls.update_movement(new_pos, idx+1)

        #                 render_cls.save_gif(self.epi)
        #                 render_cls.viewer.close()
        #                 display.stop()
        #             else:
        #                 print(len(self.actions))
        
        
        
        
        return self.grid, reward, self.cumulative_reward, self.done, goal_ob_reward

reward: -0.1 -0.2 10


In [None]:
import collections
import pdb
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

#from Sim import *



In [None]:

## Replay buffer

class ReplayBuffer():
    def __init__(self):
        self.buffer = collections.deque(maxlen=buffer_limit)

    def put(self, transition):
        self.buffer.append(transition)

    def sample(self, n):
        mini_batch = random.sample(self.buffer, n)     #sample 메서드 : buffer 중에 n 개만 뽑음
        s_lst, a_lst, r_lst, s_prime_lst, done_mask_lst = [], [], [], [], []
                                                        # state 값, action 값, reward 값, 다음 state 값, done_mask 값
                                                        # done_mask : 종료 상태의 Value 값을 마스킹해줍니다

        for transition in mini_batch: #우리가 뽑은 n 개의 미니배치들 하나씩에서
            s, a, r, s_prime, done_mask = transition #

            # 이로 보아 하나의 sample 에는 s,a,s_prime,done_mask 값이 담김을 확인할 수 있다
            s_lst.append(s)
            a_lst.append([a]) # 자료형이 list 임을 확인 가능, 여러개의 action 이 담겨있을 것이라 추측
            r_lst.append([r])
            s_prime_lst.append(s_prime)
            done_mask_lst.append([done_mask])

        return torch.tensor(s_lst, dtype=torch. float), torch.tensor(a_lst), torch.tensor(r_lst), torch.tensor(s_prime_lst, dtype=torch. float), torch.tensor(done_mask_lst)
        # s_lst, s_prime_lst 는 타입을 바꿔주었다. 데이터가 int 였기 때문에
        # 나머지는 tensor화

    def size(self):
        return len(self.buffer)




In [None]:
class QnetCNN(nn.Module):
    def __init__(self):
        super(QnetCNN, self).__init__()
        
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, stride=1),
            nn.ReLU())

        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1),
            nn.ReLU())
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1),
            nn.ReLU())
    
        #Flatten
        self.flatten = nn.Flatten()

        self.fc1 = nn.Linear(32, 16)
        self.fc2 = nn.Linear(16, 4)
        
    def forward(self,x):
        x = torch.from_numpy(np.asarray(x)).float()
        x = torch.reshape(x, (-1, 1, 7, 7))
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)# 좌우상하 4개의 값을 반환
        return x

    def sample_action(self, obs, epsilon):
        out = self.forward(obs) 
        coin = random.random() 
        

        if coin < epsilon: 
            return random.randint(0,3)
        else:
            return out.argmax().item() 


In [None]:
def train(q, q_target, memory, optimizer): 
    for i in range(10):
        s, a, r, s_prime, done_mask = memory.sample(batch_size) #32개를 버퍼에서 뽑아 모아 놓은 s,a,r,s_prime,done_mask
        q_out = q(s)                                            # s 값으로 다음 각 action 값들의 value 값 반환
        q_a = q_out.gather(1,a)                      #선택한 액션값들의 q(s,a) 반환
        max_q_prime = q_target(s_prime).max(1)[0].unsqueeze(1)  # 다음 state의 각 q(s,a) 값 반환
        target = r + gamma * max_q_prime * done_mask # 배열 맞춰주기, 쓰러진 경우는 제거
a
        loss = F.smooth_l1_loss(q_a, target)                    # DQN 의 손실함수 계산 L1 유클리드

        optimizer.zero_grad()                                   # optimizer 의 모든 parameter 를 0으로 변환
        loss.backward()                                         # loss 에 대한 gradient 계산
        optimizer.step()                                        # 손실값을 바탕으로 Qnet 의 파라미터 업데이트


In [None]:
def model_save(model_dict, epi):
    PATH = './weights/'
    torch.save({
            'model': model_dict,
            'epi': epi,
            }, PATH + 'all_0602_item1.tar')

In [None]:
import time
from tqdm import tqdm

learning_rate = 0.0001
gamma = 0.99
buffer_limit  = 100000
batch_size = 128

USE_CUDA = torch.cuda.is_available()
print(USE_CUDA)
device = torch.device('cuda:0' if USE_CUDA else 'cpu')

def main():

    env = Simulator()
    files = pd.read_csv("./data/train1.csv")#("./data/factory_order_train.csv")

    q = QnetCNN()
    q_target = QnetCNN()
    
    # PATH = './weights/'
    # checkpoint = torch.load(PATH+'all_0602_item1.tar')
    # q.load_state_dict(checkpoint['model'])
    # epi_num = checkpoint['epi']
    # q.eval()
    # 현재 Qnet 의 파라미터를 q_target 에 load
    q_target.load_state_dict(q.state_dict())      
    

    memory = ReplayBuffer()

    print_interval = 100
    score = 0.0
    
    optimizer = optim.Adam(q.parameters(), lr = learning_rate) 
    # loss 값을 바탕으로 업데이트할 비율 (q_target 말고 q 만 업데이트)
    
    # scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma)

    cnt = 0
    masking = False

    for epi in tqdm(range(300000)):
        #time.sleep(0.001)
        if epi%1000 == 0:
            model_save(q.state_dict(), epi)
            
#         if epi <= 39999:
        epsilon = max(0.01, 0.3 - 0.0005 * (epi//100))

            
        ep = epi%17
        
        
        
        s = env.reset(ep)
        obs = np.asarray(s, dtype=np.float32)
        w,h = np.where(obs==100)
        w,h = w[0], h[0]
        b = np.zeros((7,7))
        c = obs[max(0,w-3):w+4,max(0,h-3):h+4]
        n,m = np.where(c==100)
        n,m = n[0],m[0]
        if (n < 3) and (m < 3):
            b[max(0, 3-n):len(c)+3-n, max(0, 3-m):len(c[0])+3-m]= c
        elif (n<3) and (m>=3):
            b[max(0, 3-n):len(c)+3-n, max(0, 3-m):len(c[0])]= c
        elif (n>=3) and (m<3):
            b[max(0, 3-n):len(c), max(0, 3-m):len(c[0])+3-m]= c
        else:
            b[max(0, 3-n):len(c), max(0, 3-m):len(c[0])]= c

        a_step= 0
        done = False 
        first = True
        while not done:
            if first:
                a = 0
                first = False
                
            elif obs[9][4] == 50:
                a = 0
                
            else:
                a = q.sample_action(torch.from_numpy(b).float(), epsilon) 
            
            s_prime, r, cumul, done, goal_reward = env.step(a)
            array = s_prime
            s_prime = np.asarray(s_prime, dtype=np.float32)
            
            w,h = np.where(s_prime==100)
            w,h = w[0], h[0]
            b_prime = np.zeros((7,7))
            c = s_prime[max(0,w-3):w+4,max(0,h-3):h+4]
            n,m = np.where(c==100)
            n,m = n[0],m[0]
            if (n < 3) and (m < 3):
                b_prime[max(0, 3-n):len(c)+3-n, max(0, 3-m):len(c[0])+3-m]= c
            elif (n<3) and (m>=3):
                b_prime[max(0, 3-n):len(c)+3-n, max(0, 3-m):len(c[0])]= c
            elif (n>=3) and (m<3):
                b_prime[max(0, 3-n):len(c), max(0, 3-m):len(c[0])+3-m]= c
            else:
                b_prime[max(0, 3-n):len(c), max(0, 3-m):len(c[0])]= c
            
            if done:
                masking = True
            
            if masking : # 끝났으면 train 안함
                cnt +=1
                print(f'지금 다 먹고 {cnt}번째 도착완료 \n  그리고 총 보상은 : {cumul} ')
                masking = False
            
            
            if a_step == 200:
                done = True
                goal_reward= True
            
            
             
            done_mask = 0.0 if done else 1.0

            memory.put((b,a,r/100, b_prime, done_mask))
            obs = s_prime
            b = b_prime
            score += r
        
            if done : # 끝났으면 train 안함
                break

            a_step +=1 # a 의 시행횟수
 

        if memory.size() > 2000: 
            train(q, q_target, memory, optimizer) 

        if epi%print_interval==0 and epi != 0: 
            action = ['↑', '↓', '←', '→'][a]
            print(f'epi = {ep}\n 마지막 상태 :\n{obs}\n 이 때 한 행동 : {action}\n 마지막 보상 : {r}\n 총 받은 보상 :{cumul}\n end : {done}\n clear : {goal_reward}')
           # env.now_state()
            
            q_target.load_state_dict(q.state_dict()) #q_target 지금걸로 업데이트
            print(f"episode :{ep}, score = {score/print_interval}, n_buffer :{memory.size()} , eps : {epsilon*100}")
            score = 0.0

main()

True


  0%|          | 102/300000 [00:12<10:18:28,  8.08it/s]

epi = 15
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1. 100.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-34.09999999999999
 end : True
 clear : True
episode :15, score = -31.45599999999051, n_buffer :20301 , eps : 29.95


  0%|          | 153/300000 [00:18<10:27:44,  7.96it/s]

지금 다 먹고 1번째 도착완료 
  그리고 총 보상은 : 8.40000000000002 


  0%|          | 202/300000 [00:24<10:29:46,  7.93it/s]

epi = 13
 마지막 상태 :
[[  0.   0. 200.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1. 100.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-27.899999999999984
 end : True
 clear : True
episode :13, score = -28.960999999992367, n_buffer :40347 , eps : 29.9


  0%|          | 208/300000 [00:25<10:06:06,  8.24it/s]

지금 다 먹고 2번째 도착완료 
  그리고 총 보상은 : 6.200000000000017 


  0%|          | 302/300000 [00:36<10:11:27,  8.17it/s]

epi = 11
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0. 100.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-21.299999999999965
 end : True
 clear : True
episode :11, score = -28.269999999992418, n_buffer :60411 , eps : 29.849999999999998


  0%|          | 402/300000 [00:49<10:25:38,  7.98it/s]

epi = 9
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1. 100.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-24.899999999999974
 end : True
 clear : True
episode :9, score = -29.40699999999122, n_buffer :80511 , eps : 29.799999999999997


  0%|          | 444/300000 [00:54<9:58:35,  8.34it/s]

지금 다 먹고 3번째 도착완료 
  그리고 총 보상은 : 11.000000000000002 


  0%|          | 502/300000 [01:02<10:34:01,  7.87it/s]

epi = 7
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0. 200.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [100.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-33.29999999999998
 end : True
 clear : True
episode :7, score = -28.334999999992487, n_buffer :100000 , eps : 29.75


  0%|          | 602/300000 [01:14<10:21:39,  8.03it/s]

epi = 5
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [200.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0. 100.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-30.30000000000005
 end : True
 clear : True
episode :5, score = -28.895999999992387, n_buffer :100000 , eps : 29.7


  0%|          | 616/300000 [01:16<9:51:20,  8.44it/s] 

지금 다 먹고 4번째 도착완료 
  그리고 총 보상은 : 22.399999999999995 


  0%|          | 702/300000 [01:27<10:50:27,  7.67it/s]

epi = 3
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-35.49999999999999
 end : True
 clear : True
episode :3, score = -28.825999999992433, n_buffer :100000 , eps : 29.65


  0%|          | 786/300000 [01:38<10:15:32,  8.10it/s]

지금 다 먹고 5번째 도착완료 
  그리고 총 보상은 : 18.6 


  0%|          | 802/300000 [01:40<10:37:15,  7.83it/s]

epi = 1
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1. 100.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-23.29999999999996
 end : True
 clear : True
episode :1, score = -29.323999999991873, n_buffer :100000 , eps : 29.599999999999998


  0%|          | 902/300000 [01:53<10:35:13,  7.85it/s]

epi = 16
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0. 200.   0.]
 [  1.   1.   1.   1. 100.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-33.59999999999998
 end : True
 clear : True
episode :16, score = -30.076999999991898, n_buffer :100000 , eps : 29.549999999999997


  0%|          | 939/300000 [01:57<10:18:40,  8.06it/s]

지금 다 먹고 6번째 도착완료 
  그리고 총 보상은 : 2.400000000000013 


  0%|          | 990/300000 [02:04<9:47:58,  8.48it/s] 

지금 다 먹고 7번째 도착완료 
  그리고 총 보상은 : 16.799999999999997 


  0%|          | 1002/300000 [02:05<10:32:28,  7.88it/s]

epi = 14
 마지막 상태 :
[[  0.   0.   0. 200.   0.   0.   0.   0.   0.]
 [100.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-36.4
 end : True
 clear : True
episode :14, score = -29.614999999991852, n_buffer :100000 , eps : 29.5


  0%|          | 1102/300000 [02:18<10:14:54,  8.10it/s]

epi = 12
 마지막 상태 :
[[  0.   0.   0.   0. 200.   0.   0.   0.   0.]
 [100.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-32.799999999999976
 end : True
 clear : True
episode :12, score = -30.57999999999151, n_buffer :100000 , eps : 29.45


  0%|          | 1202/300000 [02:30<10:41:31,  7.76it/s]

epi = 10
 마지막 상태 :
[[  0.   0.   0.   0.   0. 200.   0.   0.   0.]
 [  1.   1.   1.   1. 100.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-31.39999999999998
 end : True
 clear : True
episode :10, score = -30.644999999990834, n_buffer :100000 , eps : 29.4


  0%|          | 1302/300000 [02:43<10:30:13,  7.90it/s]

epi = 8
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0. 200.   0.   0.]
 [  1.   1.   1. 100.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-31.699999999999978
 end : True
 clear : True
episode :8, score = -29.690999999992044, n_buffer :100000 , eps : 29.349999999999998


  0%|          | 1402/300000 [02:56<10:07:34,  8.19it/s]

epi = 6
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1. 100.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-25.300000000000086
 end : True
 clear : True
episode :6, score = -28.318999999992446, n_buffer :100000 , eps : 29.299999999999997


  1%|          | 1502/300000 [03:08<10:34:49,  7.84it/s]

epi = 4
 마지막 상태 :
[[  0. 200.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1. 100.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-35.19999999999999
 end : True
 clear : True
episode :4, score = -25.588999999993934, n_buffer :100000 , eps : 29.25


  1%|          | 1602/300000 [03:21<10:43:55,  7.72it/s]

epi = 2
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1. 100.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-22.000000000000018
 end : True
 clear : True
episode :2, score = -23.294999999995067, n_buffer :100000 , eps : 29.2


  1%|          | 1702/300000 [03:34<10:46:57,  7.68it/s]

epi = 0
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1. 100.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-16.999999999999957
 end : True
 clear : True
episode :0, score = -21.283999999995103, n_buffer :100000 , eps : 29.15


  1%|          | 1802/300000 [03:47<10:43:08,  7.73it/s]

epi = 15
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [100.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-30.200000000000003
 end : True
 clear : True
episode :15, score = -23.044999999995024, n_buffer :100000 , eps : 29.099999999999998


  1%|          | 1902/300000 [03:59<10:33:10,  7.85it/s]

epi = 13
 마지막 상태 :
[[  0.   0. 200.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-35.699999999999996
 end : True
 clear : True
episode :13, score = -23.601999999994728, n_buffer :100000 , eps : 29.049999999999997


  1%|          | 2002/300000 [04:12<10:53:09,  7.60it/s]

epi = 11
 마지막 상태 :
[[200.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1. 100.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-32.39999999999999
 end : True
 clear : True
episode :11, score = -23.88099999999471, n_buffer :100000 , eps : 28.999999999999996


  1%|          | 2080/300000 [04:22<10:29:32,  7.89it/s]

지금 다 먹고 8번째 도착완료 
  그리고 총 보상은 : 4.400000000000041 


  1%|          | 2102/300000 [04:25<10:26:20,  7.93it/s]

epi = 9
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1. 100.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.2
 총 받은 보상 :-24.39999999999998
 end : True
 clear : True
episode :9, score = -25.28899999999409, n_buffer :100000 , eps : 28.95


  1%|          | 2171/300000 [04:34<10:26:34,  7.92it/s]

지금 다 먹고 9번째 도착완료 
  그리고 총 보상은 : 10.200000000000017 


  1%|          | 2200/300000 [04:37<10:38:34,  7.77it/s]

지금 다 먹고 10번째 도착완료 
  그리고 총 보상은 : 12.800000000000004 


  1%|          | 2202/300000 [04:38<10:41:23,  7.74it/s]

epi = 7
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0. 200.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0. 100.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-22.300000000000033
 end : True
 clear : True
episode :7, score = -25.12499999999349, n_buffer :100000 , eps : 28.9


  1%|          | 2268/300000 [04:46<10:12:19,  8.10it/s]

지금 다 먹고 11번째 도착완료 
  그리고 총 보상은 : 18.2 


  1%|          | 2302/300000 [04:50<10:46:23,  7.68it/s]

epi = 5
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [200.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0. 100.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-23.70000000000002
 end : True
 clear : True
episode :5, score = -25.750999999993425, n_buffer :100000 , eps : 28.849999999999998


  1%|          | 2343/300000 [04:56<10:03:58,  8.21it/s]

지금 다 먹고 12번째 도착완료 
  그리고 총 보상은 : 11.200000000000006 


  1%|          | 2402/300000 [05:03<10:11:37,  8.11it/s]

지금 다 먹고 13번째 도착완료 
  그리고 총 보상은 : 2.1999999999999993 
epi = 3
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 100.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : 20
 총 받은 보상 :2.1999999999999993
 end : True
 clear : True
episode :3, score = -25.88299999999228, n_buffer :100000 , eps : 28.799999999999997


  1%|          | 2408/300000 [05:04<9:49:05,  8.42it/s] 

지금 다 먹고 14번째 도착완료 
  그리고 총 보상은 : 21.199999999999996 


  1%|          | 2425/300000 [05:06<10:07:06,  8.17it/s]

지금 다 먹고 15번째 도착완료 
  그리고 총 보상은 : 11.800000000000022 


  1%|          | 2442/300000 [05:08<9:52:19,  8.37it/s] 

지금 다 먹고 16번째 도착완료 
  그리고 총 보상은 : 24.0 


  1%|          | 2450/300000 [05:09<10:06:00,  8.18it/s]

지금 다 먹고 17번째 도착완료 
  그리고 총 보상은 : 9.400000000000029 


  1%|          | 2487/300000 [05:14<10:02:48,  8.23it/s]

지금 다 먹고 18번째 도착완료 
  그리고 총 보상은 : 16.199999999999996 


  1%|          | 2492/300000 [05:14<9:58:20,  8.29it/s]

지금 다 먹고 19번째 도착완료 
  그리고 총 보상은 : 6.600000000000012 


  1%|          | 2502/300000 [05:16<10:39:45,  7.75it/s]

epi = 1
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1. 100.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-21.800000000000022
 end : True
 clear : True
episode :1, score = -22.64299999999545, n_buffer :100000 , eps : 28.749999999999996


  1%|          | 2544/300000 [05:21<10:45:12,  7.68it/s]

지금 다 먹고 20번째 도착완료 
  그리고 총 보상은 : 0.8000000000000078 


  1%|          | 2602/300000 [05:29<10:25:36,  7.92it/s]

epi = 16
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0. 200.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1. 100.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-21.900000000000038
 end : True
 clear : True
episode :16, score = -25.171999999993318, n_buffer :100000 , eps : 28.7


  1%|          | 2642/300000 [05:34<10:28:01,  7.89it/s]

지금 다 먹고 21번째 도착완료 
  그리고 총 보상은 : 3.4000000000000163 


  1%|          | 2674/300000 [05:38<9:44:25,  8.48it/s] 

지금 다 먹고 22번째 도착완료 
  그리고 총 보상은 : 19.400000000000002 


  1%|          | 2702/300000 [05:41<10:50:38,  7.62it/s]

epi = 14
 마지막 상태 :
[[  0.   0.   0. 200.   0.   0.   0.   0.   0.]
 [100.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-32.59999999999996
 end : True
 clear : True
episode :14, score = -25.226999999993073, n_buffer :100000 , eps : 28.65


  1%|          | 2802/300000 [05:54<10:28:56,  7.88it/s]

epi = 12
 마지막 상태 :
[[  0.   0.   0.   0. 200.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0. 100.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-21.400000000000023
 end : True
 clear : True
episode :12, score = -24.243999999992912, n_buffer :100000 , eps : 28.599999999999998


  1%|          | 2825/300000 [05:57<10:10:05,  8.12it/s]

지금 다 먹고 23번째 도착완료 
  그리고 총 보상은 : 4.200000000000026 


  1%|          | 2902/300000 [06:07<10:36:16,  7.78it/s]

epi = 10
 마지막 상태 :
[[  0.   0.   0.   0.   0. 200.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0. 100.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-24.59999999999996
 end : True
 clear : True
episode :10, score = -22.523999999993915, n_buffer :100000 , eps : 28.549999999999997


  1%|          | 3002/300000 [06:20<10:21:44,  7.96it/s]

epi = 8
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0. 200.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1. 100.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-21.400000000000027
 end : True
 clear : True
episode :8, score = -23.280999999993277, n_buffer :100000 , eps : 28.499999999999996


  1%|          | 3102/300000 [06:32<10:29:36,  7.86it/s]

epi = 6
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [100.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-30.59999999999997
 end : True
 clear : True
episode :6, score = -25.525999999992447, n_buffer :100000 , eps : 28.449999999999996


  1%|          | 3202/300000 [06:45<11:06:39,  7.42it/s]

epi = 4
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1. 100.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-17.999999999999975
 end : True
 clear : True
episode :4, score = -25.62599999999362, n_buffer :100000 , eps : 28.4


  1%|          | 3302/300000 [06:58<10:33:13,  7.81it/s]

epi = 2
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0. 100.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-26.899999999999938
 end : True
 clear : True
episode :2, score = -25.050999999993493, n_buffer :100000 , eps : 28.349999999999998


  1%|          | 3402/300000 [07:11<10:25:19,  7.91it/s]

epi = 0
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-32.799999999999955
 end : True
 clear : True
episode :0, score = -26.198999999992253, n_buffer :100000 , eps : 28.299999999999997


  1%|          | 3502/300000 [07:23<10:19:02,  7.98it/s]

epi = 15
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0. 100.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-22.50000000000003
 end : True
 clear : True
episode :15, score = -26.10999999999282, n_buffer :100000 , eps : 28.249999999999996


  1%|          | 3602/300000 [07:36<10:42:35,  7.69it/s]

epi = 13
 마지막 상태 :
[[  0.   0. 200.   0.   0.   0.   0.   0.   0.]
 [100.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-35.59999999999998
 end : True
 clear : True
episode :13, score = -25.96099999999278, n_buffer :100000 , eps : 28.199999999999996


  1%|          | 3702/300000 [07:49<10:34:21,  7.78it/s]

epi = 11
 마지막 상태 :
[[200.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-22.30000000000002
 end : True
 clear : True
episode :11, score = -27.39399999999159, n_buffer :100000 , eps : 28.15


  1%|          | 3725/300000 [07:52<9:28:18,  8.69it/s] 

지금 다 먹고 24번째 도착완료 
  그리고 총 보상은 : 24.400000000000006 


  1%|▏         | 3802/300000 [08:01<10:38:17,  7.73it/s]

epi = 9
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1. 100.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-15.79999999999996
 end : True
 clear : True
episode :9, score = -27.374999999992806, n_buffer :100000 , eps : 28.099999999999998


  1%|▏         | 3902/300000 [08:14<10:38:45,  7.73it/s]

epi = 7
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1. 100.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-16.600000000000023
 end : True
 clear : True
episode :7, score = -25.79599999999333, n_buffer :100000 , eps : 28.049999999999997


  1%|▏         | 4002/300000 [08:27<10:32:26,  7.80it/s]

epi = 5
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1. 100.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-21.99999999999996
 end : True
 clear : True
episode :5, score = -24.852999999994022, n_buffer :100000 , eps : 27.999999999999996


  1%|▏         | 4102/300000 [08:40<10:30:25,  7.82it/s]

epi = 3
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-27.99999999999999
 end : True
 clear : True
episode :3, score = -25.62399999999404, n_buffer :100000 , eps : 27.949999999999996


  1%|▏         | 4202/300000 [08:52<10:34:41,  7.77it/s]

epi = 1
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1. 100.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-21.800000000000022
 end : True
 clear : True
episode :1, score = -25.823999999993056, n_buffer :100000 , eps : 27.9


  1%|▏         | 4302/300000 [09:05<10:17:57,  7.98it/s]

epi = 16
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0. 200.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1. 100.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-22.300000000000036
 end : True
 clear : True
episode :16, score = -27.148999999992316, n_buffer :100000 , eps : 27.849999999999998


  1%|▏         | 4365/300000 [09:13<10:08:22,  8.10it/s]

지금 다 먹고 25번째 도착완료 
  그리고 총 보상은 : 9.00000000000004 


  1%|▏         | 4402/300000 [09:18<10:31:26,  7.80it/s]

epi = 14
 마지막 상태 :
[[  0.   0.   0. 200.   0.   0.   0.   0.   0.]
 [100.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.2
 총 받은 보상 :-36.8
 end : True
 clear : True
episode :14, score = -27.428999999992346, n_buffer :100000 , eps : 27.799999999999997


  2%|▏         | 4502/300000 [09:31<11:06:41,  7.39it/s]

epi = 12
 마지막 상태 :
[[  0.   0.   0.   0. 200.   0.   0.   0.   0.]
 [  1.   1. 100.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-22.00000000000003
 end : True
 clear : True
episode :12, score = -27.748999999991828, n_buffer :100000 , eps : 27.749999999999996


  2%|▏         | 4602/300000 [09:44<10:56:16,  7.50it/s]

epi = 10
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-12.699999999999964
 end : True
 clear : True
episode :10, score = -28.243999999991978, n_buffer :100000 , eps : 27.699999999999996


  2%|▏         | 4702/300000 [09:57<10:42:03,  7.67it/s]

epi = 8
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0. 200.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1. 100.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.2
 총 받은 보상 :-28.100000000000072
 end : True
 clear : True
episode :8, score = -27.83599999999178, n_buffer :100000 , eps : 27.65


  2%|▏         | 4802/300000 [10:10<10:28:45,  7.82it/s]

epi = 6
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1. 100.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-25.30000000000004
 end : True
 clear : True
episode :6, score = -25.21399999999387, n_buffer :100000 , eps : 27.599999999999998


  2%|▏         | 4902/300000 [10:23<10:24:38,  7.87it/s]

epi = 4
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-15.199999999999957
 end : True
 clear : True
episode :4, score = -24.85099999999354, n_buffer :100000 , eps : 27.549999999999997


  2%|▏         | 5002/300000 [10:36<10:56:43,  7.49it/s]

epi = 2
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0. 100.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-33.399999999999984
 end : True
 clear : True
episode :2, score = -22.97399999999495, n_buffer :100000 , eps : 27.499999999999996


  2%|▏         | 5102/300000 [10:49<10:26:34,  7.84it/s]

epi = 0
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1. 200.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-29.899999999999945
 end : True
 clear : True
episode :0, score = -24.329999999994293, n_buffer :100000 , eps : 27.449999999999996


  2%|▏         | 5202/300000 [11:01<10:23:54,  7.87it/s]

epi = 15
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1. 100.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-12.799999999999955
 end : True
 clear : True
episode :15, score = -24.4769999999944, n_buffer :100000 , eps : 27.399999999999995


  2%|▏         | 5302/300000 [11:14<10:32:49,  7.76it/s]

epi = 13
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [100.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-12.500000000000009
 end : True
 clear : True
episode :13, score = -25.447999999994344, n_buffer :100000 , eps : 27.349999999999998


  2%|▏         | 5402/300000 [11:27<10:15:10,  7.98it/s]

epi = 11
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1. 100.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-15.19999999999995
 end : True
 clear : True
episode :11, score = -25.016999999993423, n_buffer :100000 , eps : 27.299999999999997


  2%|▏         | 5502/300000 [11:40<10:14:45,  7.98it/s]

epi = 9
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-22.800000000000033
 end : True
 clear : True
episode :9, score = -25.65099999999398, n_buffer :100000 , eps : 27.249999999999996


  2%|▏         | 5602/300000 [11:52<10:24:46,  7.85it/s]

epi = 7
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0. 200.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0. 100.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-34.399999999999984
 end : True
 clear : True
episode :7, score = -26.091999999993245, n_buffer :100000 , eps : 27.199999999999996


  2%|▏         | 5702/300000 [12:05<10:34:14,  7.73it/s]

epi = 5
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [200.   1.   1.   1.   1. 100.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-25.79999999999998
 end : True
 clear : True
episode :5, score = -26.355999999992854, n_buffer :100000 , eps : 27.149999999999995


  2%|▏         | 5802/300000 [12:18<10:15:35,  7.97it/s]

epi = 3
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-35.90000000000001
 end : True
 clear : True
episode :3, score = -27.403999999992333, n_buffer :100000 , eps : 27.099999999999998


  2%|▏         | 5902/300000 [12:31<10:47:26,  7.57it/s]

epi = 1
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-35.19999999999999
 end : True
 clear : True
episode :1, score = -27.867999999992463, n_buffer :100000 , eps : 27.049999999999997


  2%|▏         | 6002/300000 [12:44<10:32:57,  7.74it/s]

epi = 16
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0. 200.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1. 100.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.2
 총 받은 보상 :-31.499999999999982
 end : True
 clear : True
episode :16, score = -28.034999999992216, n_buffer :100000 , eps : 27.0


  2%|▏         | 6102/300000 [12:57<10:27:21,  7.81it/s]

epi = 14
 마지막 상태 :
[[  0.   0.   0. 200.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0. 100.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-30.00000000000001
 end : True
 clear : True
episode :14, score = -27.92699999999185, n_buffer :100000 , eps : 26.949999999999996


  2%|▏         | 6202/300000 [13:10<10:39:59,  7.65it/s]

epi = 12
 마지막 상태 :
[[  0.   0.   0.   0. 200.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-25.300000000000086
 end : True
 clear : True
episode :12, score = -27.190999999992684, n_buffer :100000 , eps : 26.900000000000002


  2%|▏         | 6302/300000 [13:22<10:25:55,  7.82it/s]

epi = 10
 마지막 상태 :
[[  0.   0.   0.   0.   0. 200.   0.   0.   0.]
 [100.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-36.399999999999984
 end : True
 clear : True
episode :10, score = -29.44099999999126, n_buffer :100000 , eps : 26.849999999999994


  2%|▏         | 6402/300000 [13:36<10:22:49,  7.86it/s]

epi = 8
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0. 200.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0. 100.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-31.09999999999995
 end : True
 clear : True
episode :8, score = -27.681999999991525, n_buffer :100000 , eps : 26.8


  2%|▏         | 6502/300000 [13:49<10:09:52,  8.02it/s]

epi = 6
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-23.200000000000035
 end : True
 clear : True
episode :6, score = -25.611999999992694, n_buffer :100000 , eps : 26.749999999999996


  2%|▏         | 6564/300000 [13:57<10:15:56,  7.94it/s]

지금 다 먹고 26번째 도착완료 
  그리고 총 보상은 : 3.2000000000000313 


  2%|▏         | 6602/300000 [14:01<10:32:55,  7.73it/s]

epi = 4
 마지막 상태 :
[[  0. 200.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0. 100.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-22.200000000000035
 end : True
 clear : True
episode :4, score = -26.17499999999372, n_buffer :100000 , eps : 26.700000000000003


  2%|▏         | 6632/300000 [14:05<10:20:59,  7.87it/s]

지금 다 먹고 27번째 도착완료 
  그리고 총 보상은 : 5.200000000000054 


  2%|▏         | 6702/300000 [14:14<10:15:35,  7.94it/s]

epi = 2
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1. 100.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-17.89999999999997
 end : True
 clear : True
episode :2, score = -26.55799999999265, n_buffer :100000 , eps : 26.649999999999995


  2%|▏         | 6802/300000 [14:27<10:27:40,  7.79it/s]

epi = 0
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-35.599999999999994
 end : True
 clear : True
episode :0, score = -26.058999999993326, n_buffer :100000 , eps : 26.6


  2%|▏         | 6902/300000 [14:40<10:35:50,  7.68it/s]

epi = 15
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-14.099999999999973
 end : True
 clear : True
episode :15, score = -24.38299999999313, n_buffer :100000 , eps : 26.549999999999997


  2%|▏         | 6991/300000 [14:51<9:40:52,  8.41it/s] 

지금 다 먹고 28번째 도착완료 
  그리고 총 보상은 : 14.400000000000006 


  2%|▏         | 7002/300000 [14:53<10:32:53,  7.72it/s]

epi = 13
 마지막 상태 :
[[  0.   0. 200.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1. 100.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-34.49999999999997
 end : True
 clear : True
episode :13, score = -25.472999999993124, n_buffer :100000 , eps : 26.5


  2%|▏         | 7102/300000 [15:06<10:26:45,  7.79it/s]

epi = 11
 마지막 상태 :
[[200.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1. 100.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-22.000000000000043
 end : True
 clear : True
episode :11, score = -25.056999999993568, n_buffer :100000 , eps : 26.449999999999996


  2%|▏         | 7202/300000 [15:18<10:20:26,  7.87it/s]

epi = 9
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0. 100.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-34.399999999999984
 end : True
 clear : True
episode :9, score = -25.134999999993408, n_buffer :100000 , eps : 26.400000000000002


  2%|▏         | 7302/300000 [15:31<10:11:32,  7.98it/s]

epi = 7
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0. 200.]
 [  1.   1.   1.   1.   1.   1. 100.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-21.400000000000034
 end : True
 clear : True
episode :7, score = -25.44099999999375, n_buffer :100000 , eps : 26.35


  2%|▏         | 7402/300000 [15:44<10:33:08,  7.70it/s]

epi = 5
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [200.   1.   1.   1.   1. 100.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-22.20000000000005
 end : True
 clear : True
episode :5, score = -27.520999999992505, n_buffer :100000 , eps : 26.3


  3%|▎         | 7502/300000 [15:57<10:16:33,  7.91it/s]

epi = 3
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1. 100.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-31.199999999999957
 end : True
 clear : True
episode :3, score = -23.305999999994384, n_buffer :100000 , eps : 26.25


  3%|▎         | 7602/300000 [16:10<10:23:49,  7.81it/s]

epi = 1
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1. 100.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-17.699999999999946
 end : True
 clear : True
episode :1, score = -24.044999999994985, n_buffer :100000 , eps : 26.200000000000003


  3%|▎         | 7702/300000 [16:22<10:17:46,  7.89it/s]

epi = 16
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0. 200.   0.]
 [  1.   1.   1.   1.   1. 100.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-33.29999999999999
 end : True
 clear : True
episode :16, score = -25.592999999993616, n_buffer :100000 , eps : 26.150000000000002


  3%|▎         | 7802/300000 [16:35<10:10:33,  7.98it/s]

epi = 14
 마지막 상태 :
[[  0.   0.   0. 200.   0.   0.   0.   0.   0.]
 [  1. 100.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-32.30000000000001
 end : True
 clear : True
episode :14, score = -25.719999999992933, n_buffer :100000 , eps : 26.1


  3%|▎         | 7902/300000 [16:48<10:15:39,  7.91it/s]

epi = 12
 마지막 상태 :
[[  0.   0.   0.   0. 200.   0.   0.   0.   0.]
 [  1.   1. 100.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-34.39999999999999
 end : True
 clear : True
episode :12, score = -26.44099999999292, n_buffer :100000 , eps : 26.05


  3%|▎         | 8002/300000 [17:01<10:33:21,  7.68it/s]

epi = 10
 마지막 상태 :
[[  0.   0.   0.   0.   0. 200.   0.   0.   0.]
 [  1.   1.   1.   1.   1. 100.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-21.90000000000004
 end : True
 clear : True
episode :10, score = -26.231999999992674, n_buffer :100000 , eps : 26.0


  3%|▎         | 8102/300000 [17:14<10:19:21,  7.85it/s]

epi = 8
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0. 200.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0. 100.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-26.500000000000078
 end : True
 clear : True
episode :8, score = -26.159999999992902, n_buffer :100000 , eps : 25.95


  3%|▎         | 8202/300000 [17:26<10:25:25,  7.78it/s]

epi = 6
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [100.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-30.199999999999946
 end : True
 clear : True
episode :6, score = -24.810999999992934, n_buffer :100000 , eps : 25.900000000000002


  3%|▎         | 8302/300000 [17:40<10:23:35,  7.80it/s]

epi = 4
 마지막 상태 :
[[  0. 200.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1. 100.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-21.70000000000003
 end : True
 clear : True
episode :4, score = -25.03599999999415, n_buffer :100000 , eps : 25.85


  3%|▎         | 8402/300000 [17:53<10:23:30,  7.79it/s]

epi = 2
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0. 100.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-30.700000000000042
 end : True
 clear : True
episode :2, score = -25.67599999999329, n_buffer :100000 , eps : 25.8


  3%|▎         | 8502/300000 [18:06<10:37:33,  7.62it/s]

epi = 0
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1. 100.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-26.200000000000014
 end : True
 clear : True
episode :0, score = -23.990999999993964, n_buffer :100000 , eps : 25.75


  3%|▎         | 8602/300000 [18:19<10:48:43,  7.49it/s]

epi = 15
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1. 100.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-36.299999999999976
 end : True
 clear : True
episode :15, score = -24.611999999993806, n_buffer :100000 , eps : 25.7


  3%|▎         | 8702/300000 [18:32<10:46:39,  7.51it/s]

epi = 13
 마지막 상태 :
[[  0.   0. 200.   0.   0.   0.   0.   0.   0.]
 [100.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.2
 총 받은 보상 :-35.599999999999994
 end : True
 clear : True
episode :13, score = -25.600999999993043, n_buffer :100000 , eps : 25.650000000000002


  3%|▎         | 8802/300000 [18:45<10:33:21,  7.66it/s]

epi = 11
 마지막 상태 :
[[200.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1. 100.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-27.300000000000075
 end : True
 clear : True
episode :11, score = -27.879999999992137, n_buffer :100000 , eps : 25.6


  3%|▎         | 8902/300000 [18:58<10:32:25,  7.67it/s]

epi = 9
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-34.3
 end : True
 clear : True
episode :9, score = -28.250999999991723, n_buffer :100000 , eps : 25.55


  3%|▎         | 9002/300000 [19:11<10:20:09,  7.82it/s]

epi = 7
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0. 200.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-33.89999999999998
 end : True
 clear : True
episode :7, score = -26.24799999999347, n_buffer :100000 , eps : 25.5


  3%|▎         | 9102/300000 [19:24<10:19:32,  7.83it/s]

epi = 5
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1. 100.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-12.89999999999997
 end : True
 clear : True
episode :5, score = -25.842999999993054, n_buffer :100000 , eps : 25.45


  3%|▎         | 9202/300000 [19:37<10:43:29,  7.53it/s]

epi = 3
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-13.499999999999963
 end : True
 clear : True
episode :3, score = -24.94599999999365, n_buffer :100000 , eps : 25.4


  3%|▎         | 9302/300000 [19:51<10:41:19,  7.55it/s]

epi = 1
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0. 100.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-22.900000000000027
 end : True
 clear : True
episode :1, score = -26.17399999999282, n_buffer :100000 , eps : 25.35


  3%|▎         | 9402/300000 [20:04<10:19:49,  7.81it/s]

epi = 16
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0. 200.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-22.50000000000003
 end : True
 clear : True
episode :16, score = -24.66299999999326, n_buffer :100000 , eps : 25.3


  3%|▎         | 9423/300000 [20:06<10:19:34,  7.82it/s]

지금 다 먹고 29번째 도착완료 
  그리고 총 보상은 : 5.400000000000036 


  3%|▎         | 9502/300000 [20:17<10:14:18,  7.88it/s]

epi = 14
 마지막 상태 :
[[  0.   0.   0. 200.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0. 100.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-21.40000000000003
 end : True
 clear : True
episode :14, score = -26.212999999992658, n_buffer :100000 , eps : 25.25


  3%|▎         | 9602/300000 [20:30<10:20:30,  7.80it/s]

epi = 12
 마지막 상태 :
[[  0.   0.   0.   0. 100.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-22.200000000000003
 end : True
 clear : True
episode :12, score = -28.09499999999177, n_buffer :100000 , eps : 25.2


  3%|▎         | 9702/300000 [20:43<10:47:49,  7.47it/s]

epi = 10
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0. 100.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-19.299999999999976
 end : True
 clear : True
episode :10, score = -25.891999999993068, n_buffer :100000 , eps : 25.15


  3%|▎         | 9802/300000 [20:56<10:20:16,  7.80it/s]

epi = 8
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0. 200.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1. 100.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-27.60000000000003
 end : True
 clear : True
episode :8, score = -25.491999999993126, n_buffer :100000 , eps : 25.1


  3%|▎         | 9902/300000 [21:09<10:23:18,  7.76it/s]

epi = 6
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1. 100.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.2
 총 받은 보상 :-33.09999999999998
 end : True
 clear : True
episode :6, score = -25.79599999999357, n_buffer :100000 , eps : 25.05


  3%|▎         | 10002/300000 [21:22<10:17:50,  7.82it/s]

epi = 4
 마지막 상태 :
[[  0. 200.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0. 100.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-24.500000000000025
 end : True
 clear : True
episode :4, score = -26.38499999999315, n_buffer :100000 , eps : 25.0


  3%|▎         | 10102/300000 [21:35<10:29:24,  7.68it/s]

epi = 2
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1. 100.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-35.09999999999999
 end : True
 clear : True
episode :2, score = -25.63799999999303, n_buffer :100000 , eps : 24.95


  3%|▎         | 10177/300000 [21:45<10:17:43,  7.82it/s]

지금 다 먹고 30번째 도착완료 
  그리고 총 보상은 : -0.3999999999999986 


  3%|▎         | 10202/300000 [21:48<10:23:09,  7.75it/s]

epi = 0
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1. 200.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-25.30000000000007
 end : True
 clear : True
episode :0, score = -24.508999999992685, n_buffer :100000 , eps : 24.9


  3%|▎         | 10302/300000 [22:01<10:32:17,  7.64it/s]

epi = 15
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [200.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-21.10000000000002
 end : True
 clear : True
episode :15, score = -23.76599999999381, n_buffer :100000 , eps : 24.85


  3%|▎         | 10370/300000 [22:10<10:13:37,  7.87it/s]

지금 다 먹고 31번째 도착완료 
  그리고 총 보상은 : 14.800000000000011 


  3%|▎         | 10402/300000 [22:14<10:20:09,  7.78it/s]

epi = 13
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1. 100.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-23.599999999999977
 end : True
 clear : True
episode :13, score = -25.26399999999356, n_buffer :100000 , eps : 24.8


  3%|▎         | 10412/300000 [22:15<10:00:39,  8.04it/s]

지금 다 먹고 32번째 도착완료 
  그리고 총 보상은 : 7.800000000000043 


  4%|▎         | 10502/300000 [22:27<10:45:07,  7.48it/s]

epi = 11
 마지막 상태 :
[[200.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0. 100.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-23.799999999999986
 end : True
 clear : True
episode :11, score = -25.80799999999369, n_buffer :100000 , eps : 24.75


  4%|▎         | 10508/300000 [22:28<10:29:44,  7.66it/s]

지금 다 먹고 33번째 도착완료 
  그리고 총 보상은 : 4.800000000000061 


  4%|▎         | 10602/300000 [22:40<10:33:05,  7.62it/s]

epi = 9
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0. 100.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-34.00000000000001
 end : True
 clear : True
episode :9, score = -25.354999999993098, n_buffer :100000 , eps : 24.7


  4%|▎         | 10702/300000 [22:53<10:30:12,  7.65it/s]

epi = 7
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0. 200.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-34.099999999999994
 end : True
 clear : True
episode :7, score = -25.84199999999333, n_buffer :100000 , eps : 24.65


  4%|▎         | 10802/300000 [23:06<10:33:05,  7.61it/s]

epi = 5
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1. 100.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-12.099999999999993
 end : True
 clear : True
episode :5, score = -27.702999999991626, n_buffer :100000 , eps : 24.6


  4%|▎         | 10902/300000 [23:19<10:22:41,  7.74it/s]

epi = 3
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-28.899999999999956
 end : True
 clear : True
episode :3, score = -25.931999999992637, n_buffer :100000 , eps : 24.55


  4%|▎         | 11002/300000 [23:32<10:29:34,  7.65it/s]

epi = 1
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-30.800000000000036
 end : True
 clear : True
episode :1, score = -26.556999999993145, n_buffer :100000 , eps : 24.5


  4%|▎         | 11102/300000 [23:45<10:36:51,  7.56it/s]

epi = 16
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0. 200.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0. 100.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-21.700000000000017
 end : True
 clear : True
episode :16, score = -27.37399999999226, n_buffer :100000 , eps : 24.45


  4%|▎         | 11202/300000 [23:58<10:39:00,  7.53it/s]

epi = 14
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1. 100.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-14.5
 end : True
 clear : True
episode :14, score = -26.040999999993154, n_buffer :100000 , eps : 24.4


  4%|▍         | 11302/300000 [24:11<10:14:41,  7.83it/s]

epi = 12
 마지막 상태 :
[[  0.   0.   0.   0. 200.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0. 100.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-22.200000000000017
 end : True
 clear : True
episode :12, score = -25.38499999999263, n_buffer :100000 , eps : 24.349999999999998


  4%|▍         | 11402/300000 [24:24<10:06:35,  7.93it/s]

epi = 10
 마지막 상태 :
[[  0.   0.   0.   0.   0. 200.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-36.900000000000006
 end : True
 clear : True
episode :10, score = -26.72999999999273, n_buffer :100000 , eps : 24.3


  4%|▍         | 11502/300000 [24:37<10:36:15,  7.56it/s]

epi = 8
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0. 200.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1. 100.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-21.50000000000002
 end : True
 clear : True
episode :8, score = -27.36499999999225, n_buffer :100000 , eps : 24.25


  4%|▍         | 11602/300000 [24:50<10:39:15,  7.52it/s]

epi = 6
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-37.40000000000002
 end : True
 clear : True
episode :6, score = -26.517999999993155, n_buffer :100000 , eps : 24.2


  4%|▍         | 11702/300000 [25:03<10:04:17,  7.95it/s]

epi = 4
 마지막 상태 :
[[  0. 200.   0.   0.   0.   0.   0.   0.   0.]
 [100.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-32.19999999999994
 end : True
 clear : True
episode :4, score = -25.54399999999333, n_buffer :100000 , eps : 24.15


  4%|▍         | 11802/300000 [25:16<10:18:45,  7.76it/s]

epi = 2
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0. 100.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-22.400000000000027
 end : True
 clear : True
episode :2, score = -22.991999999995013, n_buffer :100000 , eps : 24.099999999999998


  4%|▍         | 11902/300000 [25:29<10:12:00,  7.85it/s]

epi = 0
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1. 100.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-21.900000000000038
 end : True
 clear : True
episode :0, score = -23.06299999999439, n_buffer :100000 , eps : 24.05


  4%|▍         | 11971/300000 [25:38<9:53:59,  8.08it/s]

지금 다 먹고 34번째 도착완료 
  그리고 총 보상은 : 13.800000000000034 


  4%|▍         | 12002/300000 [25:42<11:16:48,  7.09it/s]

epi = 15
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1. 100.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-13.199999999999976
 end : True
 clear : True
episode :15, score = -24.62499999999373, n_buffer :100000 , eps : 24.0


  4%|▍         | 12102/300000 [25:56<10:28:41,  7.63it/s]

epi = 13
 마지막 상태 :
[[  0.   0. 200.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1. 100.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-21.300000000000036
 end : True
 clear : True
episode :13, score = -25.183999999993194, n_buffer :100000 , eps : 23.95


  4%|▍         | 12202/300000 [26:09<10:17:44,  7.76it/s]

epi = 11
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1. 100.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.2
 총 받은 보상 :-21.099999999999937
 end : True
 clear : True
episode :11, score = -24.59199999999352, n_buffer :100000 , eps : 23.9


  4%|▍         | 12257/300000 [26:16<9:58:43,  8.01it/s]

지금 다 먹고 35번째 도착완료 
  그리고 총 보상은 : 23.200000000000003 


  4%|▍         | 12302/300000 [26:21<10:07:50,  7.89it/s]

epi = 9
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-33.500000000000014
 end : True
 clear : True
episode :9, score = -26.64999999999254, n_buffer :100000 , eps : 23.849999999999998


  4%|▍         | 12402/300000 [26:34<10:23:04,  7.69it/s]

epi = 7
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0. 200.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1. 100.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.2
 총 받은 보상 :-26.800000000000026
 end : True
 clear : True
episode :7, score = -27.494999999992398, n_buffer :100000 , eps : 23.799999999999997


  4%|▍         | 12502/300000 [26:47<10:02:25,  7.95it/s]

epi = 5
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1. 100.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-23.399999999999977
 end : True
 clear : True
episode :5, score = -28.085999999992055, n_buffer :100000 , eps : 23.75


  4%|▍         | 12602/300000 [27:00<10:30:10,  7.60it/s]

epi = 3
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-36.300000000000004
 end : True
 clear : True
episode :3, score = -25.766999999992944, n_buffer :100000 , eps : 23.7


  4%|▍         | 12702/300000 [27:13<10:30:03,  7.60it/s]

epi = 1
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1. 100.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-35.0
 end : True
 clear : True
episode :1, score = -27.7879999999923, n_buffer :100000 , eps : 23.65


  4%|▍         | 12802/300000 [27:26<10:06:07,  7.90it/s]

epi = 16
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0. 200.   0.]
 [  1.   1. 100.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-34.400000000000006
 end : True
 clear : True
episode :16, score = -26.57199999999316, n_buffer :100000 , eps : 23.599999999999998


  4%|▍         | 12902/300000 [27:39<10:14:37,  7.79it/s]

epi = 14
 마지막 상태 :
[[  0.   0.   0. 200.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1. 100.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-34.7
 end : True
 clear : True
episode :14, score = -27.761999999992526, n_buffer :100000 , eps : 23.549999999999997


  4%|▍         | 13002/300000 [27:52<10:34:10,  7.54it/s]

epi = 12
 마지막 상태 :
[[  0.   0.   0.   0. 200.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0. 100.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-29.20000000000003
 end : True
 clear : True
episode :12, score = -26.5239999999925, n_buffer :100000 , eps : 23.5


  4%|▍         | 13102/300000 [28:05<10:26:41,  7.63it/s]

epi = 10
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0. 100.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-13.199999999999962
 end : True
 clear : True
episode :10, score = -25.46599999999369, n_buffer :100000 , eps : 23.45


  4%|▍         | 13202/300000 [28:18<10:21:39,  7.69it/s]

epi = 8
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0. 200.   0.   0.]
 [  1.   1.   1.   1. 100.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-31.799999999999994
 end : True
 clear : True
episode :8, score = -24.399999999993817, n_buffer :100000 , eps : 23.4


  4%|▍         | 13302/300000 [28:31<10:46:24,  7.39it/s]

epi = 6
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0. 100.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-32.099999999999945
 end : True
 clear : True
episode :6, score = -24.391999999994226, n_buffer :100000 , eps : 23.349999999999998


  4%|▍         | 13402/300000 [28:44<10:39:59,  7.46it/s]

epi = 4
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0. 100.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-14.200000000000015
 end : True
 clear : True
episode :4, score = -25.236999999994588, n_buffer :100000 , eps : 23.299999999999997


  5%|▍         | 13502/300000 [28:57<10:18:47,  7.72it/s]

epi = 2
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0. 100.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-16.69999999999995
 end : True
 clear : True
episode :2, score = -22.410999999994434, n_buffer :100000 , eps : 23.25


  5%|▍         | 13602/300000 [29:10<10:27:01,  7.61it/s]

epi = 0
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-35.300000000000004
 end : True
 clear : True
episode :0, score = -24.853999999992983, n_buffer :100000 , eps : 23.2


  5%|▍         | 13702/300000 [29:23<10:34:59,  7.51it/s]

epi = 15
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0. 100.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-16.29999999999995
 end : True
 clear : True
episode :15, score = -24.225999999993604, n_buffer :100000 , eps : 23.15


  5%|▍         | 13709/300000 [29:24<9:54:50,  8.02it/s] 

지금 다 먹고 36번째 도착완료 
  그리고 총 보상은 : 20.2 


  5%|▍         | 13802/300000 [29:36<10:13:29,  7.78it/s]

epi = 13
 마지막 상태 :
[[  0.   0. 200.   0.   0.   0.   0.   0.   0.]
 [  1. 100.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-31.499999999999957
 end : True
 clear : True
episode :13, score = -23.822999999993467, n_buffer :100000 , eps : 23.099999999999998


  5%|▍         | 13902/300000 [29:49<10:12:55,  7.78it/s]

epi = 11
 마지막 상태 :
[[200.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1. 100.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-20.90000000000003
 end : True
 clear : True
episode :11, score = -25.574999999992787, n_buffer :100000 , eps : 23.049999999999997


  5%|▍         | 13921/300000 [29:51<10:08:50,  7.83it/s]

지금 다 먹고 37번째 도착완료 
  그리고 총 보상은 : -1.4000000000000128 


  5%|▍         | 14002/300000 [30:02<10:29:57,  7.57it/s]

epi = 9
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0. 100.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-25.100000000000055
 end : True
 clear : True
episode :9, score = -26.635999999992436, n_buffer :100000 , eps : 23.0


  5%|▍         | 14102/300000 [30:15<10:22:12,  7.66it/s]

epi = 7
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0. 100.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-18.199999999999978
 end : True
 clear : True
episode :7, score = -25.501999999992695, n_buffer :100000 , eps : 22.95


  5%|▍         | 14202/300000 [30:28<10:34:13,  7.51it/s]

epi = 5
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [200.   1. 100.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.2
 총 받은 보상 :-27.70000000000006
 end : True
 clear : True
episode :5, score = -25.132999999993864, n_buffer :100000 , eps : 22.9


  5%|▍         | 14302/300000 [30:41<10:49:02,  7.34it/s]

epi = 3
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1. 100.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-34.69999999999999
 end : True
 clear : True
episode :3, score = -25.371999999992862, n_buffer :100000 , eps : 22.849999999999998


  5%|▍         | 14402/300000 [30:54<10:25:31,  7.61it/s]

epi = 1
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1. 100.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-21.000000000000025
 end : True
 clear : True
episode :1, score = -24.340999999993084, n_buffer :100000 , eps : 22.799999999999997


  5%|▍         | 14502/300000 [31:07<10:07:41,  7.83it/s]

epi = 16
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0. 200.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1. 100.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-22.10000000000004
 end : True
 clear : True
episode :16, score = -26.134999999992306, n_buffer :100000 , eps : 22.749999999999996


  5%|▍         | 14602/300000 [31:20<10:15:56,  7.72it/s]

epi = 14
 마지막 상태 :
[[  0.   0.   0. 200.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1. 100.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.2
 총 받은 보상 :-30.80000000000001
 end : True
 clear : True
episode :14, score = -27.011999999992398, n_buffer :100000 , eps : 22.7


  5%|▍         | 14702/300000 [31:33<10:36:04,  7.48it/s]

epi = 12
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-11.79999999999997
 end : True
 clear : True
episode :12, score = -26.321999999992446, n_buffer :100000 , eps : 22.65


  5%|▍         | 14802/300000 [31:46<10:18:31,  7.68it/s]

epi = 10
 마지막 상태 :
[[  0.   0.   0.   0.   0. 200.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0. 100.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-25.80000000000003
 end : True
 clear : True
episode :10, score = -25.885999999992663, n_buffer :100000 , eps : 22.599999999999998


  5%|▍         | 14902/300000 [32:00<10:00:56,  7.91it/s]

epi = 8
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1. 100.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-13.899999999999977
 end : True
 clear : True
episode :8, score = -24.292999999993174, n_buffer :100000 , eps : 22.549999999999997


  5%|▌         | 15002/300000 [32:13<10:45:07,  7.36it/s]

epi = 6
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1. 100.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-20.900000000000023
 end : True
 clear : True
episode :6, score = -23.514999999994135, n_buffer :100000 , eps : 22.499999999999996


  5%|▌         | 15102/300000 [32:26<10:24:55,  7.60it/s]

epi = 4
 마지막 상태 :
[[  0. 200.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1. 100.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-21.300000000000033
 end : True
 clear : True
episode :4, score = -23.962999999994185, n_buffer :100000 , eps : 22.45


  5%|▌         | 15202/300000 [32:39<10:10:22,  7.78it/s]

epi = 2
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0. 100.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-20.299999999999955
 end : True
 clear : True
episode :2, score = -24.259999999994008, n_buffer :100000 , eps : 22.4


  5%|▌         | 15302/300000 [32:52<10:12:00,  7.75it/s]

epi = 0
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1. 100.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-15.999999999999954
 end : True
 clear : True
episode :0, score = -24.474999999994836, n_buffer :100000 , eps : 22.349999999999998


  5%|▌         | 15402/300000 [33:05<10:55:59,  7.23it/s]

epi = 15
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1. 100.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-28.999999999999975
 end : True
 clear : True
episode :15, score = -23.796999999995045, n_buffer :100000 , eps : 22.299999999999997


  5%|▌         | 15502/300000 [33:18<10:18:58,  7.66it/s]

epi = 13
 마지막 상태 :
[[  0.   0. 200.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1. 100.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-31.200000000000003
 end : True
 clear : True
episode :13, score = -26.05699999999368, n_buffer :100000 , eps : 22.249999999999996


  5%|▌         | 15602/300000 [33:31<10:20:15,  7.64it/s]

epi = 11
 마지막 상태 :
[[200.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1. 100.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-27.000000000000064
 end : True
 clear : True
episode :11, score = -24.272999999995154, n_buffer :100000 , eps : 22.199999999999996


  5%|▌         | 15702/300000 [33:44<10:16:12,  7.69it/s]

epi = 9
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-34.29999999999998
 end : True
 clear : True
episode :9, score = -23.733999999992992, n_buffer :100000 , eps : 22.15


  5%|▌         | 15802/300000 [33:57<10:03:53,  7.84it/s]

epi = 7
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1. 100.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-19.49999999999999
 end : True
 clear : True
episode :7, score = -24.34199999999386, n_buffer :100000 , eps : 22.099999999999998


  5%|▌         | 15902/300000 [34:10<10:33:54,  7.47it/s]

epi = 5
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1. 100.   1.   1.   1.   1.   1.   1.]
 [200.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-24.99999999999997
 end : True
 clear : True
episode :5, score = -25.12999999999376, n_buffer :100000 , eps : 22.049999999999997


  5%|▌         | 16002/300000 [34:23<10:21:57,  7.61it/s]

epi = 3
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1. 100.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-20.700000000000028
 end : True
 clear : True
episode :3, score = -26.229999999992504, n_buffer :100000 , eps : 21.999999999999996


  5%|▌         | 16102/300000 [34:36<10:22:05,  7.61it/s]

epi = 1
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1. 100.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-33.49999999999997
 end : True
 clear : True
episode :1, score = -22.86099999999418, n_buffer :100000 , eps : 21.949999999999996


  5%|▌         | 16135/300000 [34:40<9:59:18,  7.89it/s]

지금 다 먹고 38번째 도착완료 
  그리고 총 보상은 : 12.80000000000002 


  5%|▌         | 16202/300000 [34:49<10:31:23,  7.49it/s]

epi = 16
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0. 200.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0. 100.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-35.599999999999994
 end : True
 clear : True
episode :16, score = -23.959999999994, n_buffer :100000 , eps : 21.9


  5%|▌         | 16231/300000 [34:53<9:49:21,  8.02it/s]

지금 다 먹고 39번째 도착완료 
  그리고 총 보상은 : 8.20000000000001 


  5%|▌         | 16302/300000 [35:02<10:04:55,  7.82it/s]

epi = 14
 마지막 상태 :
[[  0.   0.   0. 200.   0.   0.   0.   0.   0.]
 [  1.   1.   1. 100.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-27.100000000000044
 end : True
 clear : True
episode :14, score = -24.32999999999343, n_buffer :100000 , eps : 21.849999999999998


  5%|▌         | 16402/300000 [35:15<10:09:17,  7.76it/s]

epi = 12
 마지막 상태 :
[[  0.   0.   0.   0. 200.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1. 100.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-25.10000000000001
 end : True
 clear : True
episode :12, score = -26.261999999993815, n_buffer :100000 , eps : 21.799999999999997


  6%|▌         | 16502/300000 [35:29<10:24:43,  7.56it/s]

epi = 10
 마지막 상태 :
[[  0.   0.   0.   0.   0. 200.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1. 100.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.1
 총 받은 보상 :-24.900000000000066
 end : True
 clear : True
episode :10, score = -26.89499999999259, n_buffer :100000 , eps : 21.749999999999996


  6%|▌         | 16602/300000 [35:42<10:58:03,  7.18it/s]

epi = 8
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0. 200.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1. 100.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.2
 총 받은 보상 :-29.50000000000005
 end : True
 clear : True
episode :8, score = -25.454999999993962, n_buffer :100000 , eps : 21.699999999999996


  6%|▌         | 16702/300000 [35:55<10:04:04,  7.82it/s]

epi = 6
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1. 100.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-35.6
 end : True
 clear : True
episode :6, score = -23.640999999994403, n_buffer :100000 , eps : 21.65


  6%|▌         | 16802/300000 [36:08<10:11:17,  7.72it/s]

epi = 4
 마지막 상태 :
[[  0. 200.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0. 100.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-22.30000000000003
 end : True
 clear : True
episode :4, score = -24.152999999993373, n_buffer :100000 , eps : 21.599999999999998


  6%|▌         | 16902/300000 [36:21<10:13:37,  7.69it/s]

epi = 2
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1. 100.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0. 200.   0.   0.   0.   0.]]
 이 때 한 행동 : ↓
 마지막 보상 : -0.1
 총 받은 보상 :-13.899999999999938
 end : True
 clear : True
episode :2, score = -24.865999999993438, n_buffer :100000 , eps : 21.549999999999997


  6%|▌         | 17002/300000 [36:34<10:18:32,  7.63it/s]

epi = 0
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  1.   1.   0.   1.   0. 100.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : →
 마지막 보상 : -0.2
 총 받은 보상 :-37.40000000000001
 end : True
 clear : True
episode :0, score = -27.316999999992444, n_buffer :100000 , eps : 21.499999999999996


  6%|▌         | 17102/300000 [36:48<10:21:30,  7.59it/s]

epi = 15
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1. 100.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [200.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.2
 총 받은 보상 :-21.100000000000012
 end : True
 clear : True
episode :15, score = -25.94499999999336, n_buffer :100000 , eps : 21.449999999999996


  6%|▌         | 17202/300000 [37:01<10:16:12,  7.65it/s]

epi = 13
 마지막 상태 :
[[  0.   0. 200.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0. 100.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ↑
 마지막 보상 : -0.1
 총 받은 보상 :-20.800000000000022
 end : True
 clear : True
episode :13, score = -25.24899999999336, n_buffer :100000 , eps : 21.4


  6%|▌         | 17302/300000 [37:14<9:56:25,  7.90it/s]

epi = 11
 마지막 상태 :
[[200.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1. 100.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0.   1.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.1
 총 받은 보상 :-22.700000000000053
 end : True
 clear : True
episode :11, score = -24.756999999993358, n_buffer :100000 , eps : 21.349999999999998


  6%|▌         | 17402/300000 [37:27<10:17:15,  7.63it/s]

epi = 9
 마지막 상태 :
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   1.   1.   1.   1.   1.   1.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  0.   1.   0.   1.   0.   1.   0.   1. 200.]
 [  0.   1.   0.   1.   0.   1.   0.   1.   0.]
 [  1.   1.   0.   1.   0. 100.   0.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  1.   1.   1.   1.   1.   1.   1.   1.   1.]
 [  0.   0.   0.   0.   1.   0.   0.   0.   0.]]
 이 때 한 행동 : ←
 마지막 보상 : -0.2
 총 받은 보상 :-27.799999999999937
 end : True
 clear : True
episode :9, score = -25.58399999999303, n_buffer :100000 , eps : 21.299999999999997


  6%|▌         | 17440/300000 [37:32<10:08:09,  7.74it/s]


KeyboardInterrupt: ignored

NameError: ignored