# Configurações básicas do programa

In [1]:
# Logs >>>
import logging
import os
from colorlog import ColoredFormatter

"""
Dictionary that store all possible floor state values
"""
FLOOR_STATE = {"clean" : 0, "dirty" : 1}
AGENT_STATE = {"right": 1, "left": -1, "clean": 0, "NoOp": None}


def init_log():
    # Initializing the logger
    logFormat = "%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s"
    formatter = ColoredFormatter("%(log_color)s" + logFormat)
    
    # Terminal
    stream = logging.StreamHandler()
    stream.setFormatter(formatter)

    logging.basicConfig(
        level=logging.DEBUG,
        format=logFormat,
        handlers=[
            stream
        ]
    )



# Classe do Agente

Classe responsável por armazenar e saber se comportar dado o ambiente

In [2]:
import random

class Agent:

    def __init__(self):
        # Default init
        self.performance = 0

        # IDK ABOUT THAT
        self.direction = 1

    def act(self, env):
        if env.rooms[env.agent_position] == FLOOR_STATE["dirty"]:
            return AGENT_STATE["clean"]
        else:
            if env.agent_position == 0:
                return AGENT_STATE["right"]
            elif env.agent_position == len(env.rooms) - 1:
                return AGENT_STATE["left"]
            else:
                return AGENT_STATE["right"] if random.randint(1, 2) == 1 else AGENT_STATE["left"]

    # Config
    def __iadd__(self, increase):
        self.performance + increase
        return self


# Ambiente
O ambiente será representado a partir de um vetor 1D (Pois possui apenas ações de direita, esquerda, limpar, nada). Esse vetor pode aumentar e diminuir, não importando o tamanho inicial

In [3]:
import random

class Environment:

    def __init__(self, size, dirty_probability):
        # always >= 2
        self.size = size

        # 0 - clean / 1 - dirty

        self.rooms = [random.choice(list(FLOOR_STATE.values())) for i in range(0, size)]
        
        # Agent vars
        self.agent_position = random.randrange(0, size)
        self.agent = Agent()

        # Artificial Env Controller
        self.dirty_probability = dirty_probability


    def artificial_env_changes(self):
        
        # Rand to decide if we should dirty someplace
        if random.randint(0, 100) < self.dirty_probability: 
            self.rooms[random.randrange(0, self.size)] = 1
    

    def update(self):
        
        choice = self.agent.act(self)

        if choice == AGENT_STATE["clean"]:

            logging.debug("Limpando a sala " + str(self.agent_position))

            self.rooms[self.agent_position] = FLOOR_STATE["clean"]
            self.agent.performance += 1
        
        elif choice == AGENT_STATE["right"]:

            logging.debug("Movendo de " + str(self.agent_position) + " para " + str(self.agent_position + 1))

            self.agent_position += 1
        
        elif choice == AGENT_STATE["left"]:

            logging.debug("Movendo de " + str(self.agent_position) + " para " + str(self.agent_position - 1))

            self.agent_position -= 1
        
        # Raise an exception ????????
        
        return

    
    def show_env(self):
        
        message = ""
        for floor in self.rooms:
            
            show = "C" if floor == FLOOR_STATE["clean"] else "D"
            message += " | " + show
        
        message += " | -> agent position: " + str(self.agent_position)
        logging.debug(message)



    



In [4]:
def main():
    init_log()

    env = Environment(2, 50)

    for i in range(0, 50):
        # Play
        env.show_env()
        
        env.update()
        
        # Generate dirty
        env.artificial_env_changes()
    

    logging.info("Pontuação total: " + str(env.agent.performance))
    

    return None


main()

[37m2021-10-04 16:49:22,000 [MainThread  ] [DEBUG]   | D | D | -> agent position: 0[0m
[37m2021-10-04 16:49:22,004 [MainThread  ] [DEBUG]  Limpando a sala 0[0m
[37m2021-10-04 16:49:22,005 [MainThread  ] [DEBUG]   | C | D | -> agent position: 0[0m
[37m2021-10-04 16:49:22,005 [MainThread  ] [DEBUG]  Movendo de 0 para 1[0m
[37m2021-10-04 16:49:22,007 [MainThread  ] [DEBUG]   | C | D | -> agent position: 1[0m
[37m2021-10-04 16:49:22,008 [MainThread  ] [DEBUG]  Limpando a sala 1[0m
[37m2021-10-04 16:49:22,009 [MainThread  ] [DEBUG]   | C | C | -> agent position: 1[0m
[37m2021-10-04 16:49:22,010 [MainThread  ] [DEBUG]  Movendo de 1 para 0[0m
[37m2021-10-04 16:49:22,011 [MainThread  ] [DEBUG]   | C | C | -> agent position: 0[0m
[37m2021-10-04 16:49:22,012 [MainThread  ] [DEBUG]  Movendo de 0 para 1[0m
[37m2021-10-04 16:49:22,014 [MainThread  ] [DEBUG]   | C | C | -> agent position: 1[0m
[37m2021-10-04 16:49:22,015 [MainThread  ] [DEBUG]  Movendo de 1 para 0[0m
[37m202

[32m2021-10-04 16:49:22,111 [MainThread  ] [INFO ]  Pontuação total: 25[0m
