In [1]:
import os
import json
import random
import time
import copy


class Thing:
    def __init__(self, name):
        self.name = name

    def state(self):
        return json.dumps(self.__dict__)

    def __repr__(self):
        return self.state()

    def act(self):
        pass


class Room(Thing):
    def __init__(self, name, neat):
        super().__init__(name)
        self.neat = neat


class Agent(Thing):
    def __init__(self, loc):
        super().__init__(self.__class__.__name__)
        self.loc = loc
        self.action = 'NoOp'

    def act(self, world):
        self.action = random.choice(['Left', 'Right', 'Suck', 'NoOp'])
        return self.action


class World(Thing):
    def __init__(self):
        self.map = [
            Room('A', 'Clean'),
            Room('B', 'Dirty')
        ]
        self.agents = [Agent(0)]
        self.tick = 0

    def __repr__(self):
        return "\n".join([
            "tick:   ============{}===========".format(self.tick),
            "map:    {}".format(self.map),
            "agents: {}".format(self.agents),
        ])

    def act(self):
        self.tick += 1
        # 更新rooms
        for room in self.map:
            if random.randint(1, 4) == 1:
                room.neat = 'Dirty'
        # 更新Agent
        for agent in self.agents:
            action = agent.act(self)  # Left, Right, Suck, NoOp
            if action == 'Left':
                agent.loc = (agent.loc - 1) % len(self.map)
            elif action == 'Right':
                agent.loc = (agent.loc + 1) % len(self.map)
            elif action == 'Suck':
                self.map[agent.loc].neat = 'Clean'

    def living(self):
        while True:
            world.act()
            print(world)
            time.sleep(2)


world = World()
world.living()

map:    [{"name": "A", "neat": "Clean"}, {"name": "B", "neat": "Dirty"}]
agents: [{"name": "Agent", "loc": 0, "action": "Suck"}]


KeyboardInterrupt: 

In [2]:
# 基于Table的Agent
class TableAgent(Agent):
    def __init__(self, loc):
        super().__init__(loc)
        self.table = {
            (('A', 'Clean')): 'Right',
            (('A', 'Dirty')): 'Suck',
            (('B', 'Clean')): 'Left',
            (('B', 'Dirty')): 'Suck',
            (('A', 'Clean'), ('A', 'Clean')): 'Right',
            (('A', 'Dirty'), ('A', 'Dirty')): 'Suck',
        }

    def act(self, world):
        room = world.map[self.loc]
        self.action = self.table.get((room.name, room.neat))
        return self.action

    def __repr__(self):
        my_dict = copy.copy(self.__dict__)
        my_dict.pop('table')
        return json.dumps(my_dict)


world = World()
world.agents = [TableAgent(0)]
world.living()

map:    [{"name": "A", "neat": "Clean"}, {"name": "B", "neat": "Dirty"}]
agents: [{"name": "TableAgent", "loc": 1, "action": "Right"}]
map:    [{"name": "A", "neat": "Clean"}, {"name": "B", "neat": "Clean"}]
agents: [{"name": "TableAgent", "loc": 1, "action": "Suck"}]


KeyboardInterrupt: 

In [5]:
# 简单反射的Agent
class ReflexAgent(Agent):
    def __init__(self, loc):
        super().__init__(loc)

    def act(self, world):
        room = world.map[self.loc]
        self.action = 'NoOp'
        if room.neat == 'Dirty':
            self.action = 'Suck'
        elif room.name == 'A':
            self.action = 'Right'
        elif room.name == 'B':
            self.action = 'Left'
        return self.action

    def __repr__(self):
        my_dict = copy.copy(self.__dict__)
        return json.dumps(my_dict)


class NoChangeWorld(World):
    def act(self):
        self.tick += 1
        # 更新Agent
        for agent in self.agents:
            action = agent.act(self)  # Left, Right, Suck, NoOp
            if action == 'Left':
                agent.loc = (agent.loc - 1) % len(self.map)
            elif action == 'Right':
                agent.loc = (agent.loc + 1) % len(self.map)
            elif action == 'Suck':
                self.map[agent.loc].neat = 'Clean'


world = NoChangeWorld()
world.agents = [ReflexAgent(0)]
world.living()

map:    [{"name": "A", "neat": "Clean"}, {"name": "B", "neat": "Dirty"}]
agents: [{"name": "ReflexAgent", "loc": 1, "action": "Right"}]


KeyboardInterrupt: 

In [3]:
# 基于模型的Agent
class Model(Thing):
    def __init__(self, world):
        self.world = world
        self.resume()

    def act(self, loc):
        room = self.map[loc]
        action = 'NoOp'
        if room.neat == 'Dirty':
            action = 'Suck'
        elif room.name == 'A':
            action = 'Right'
        elif room.name == 'B':
            action = 'Left'
        return action

    def resume(self):
        self.map = copy.deepcopy(world.map)


class ModelAgent(Agent):
    def __init__(self, loc, world):
        super().__init__(loc)
        self.model = Model(world)

    def act(self, world):
        self.model.resume()
        self.action = self.model.act(self.loc)
        return self.action

    def __repr__(self):
        my_dict = copy.copy(self.__dict__)
        my_dict.pop('model')
        return json.dumps(my_dict)


world = World()
world.agents = [ModelAgent(0, world)]
world.living()

map:    [{"name": "A", "neat": "Clean"}, {"name": "B", "neat": "Dirty"}]
agents: [{"name": "ModelAgent", "loc": 1, "action": "Right"}]


KeyboardInterrupt: 

In [6]:
# 基于目标的Agent
class GoalModel(Model):
    def act(self, loc, action):
        if action == 'Left':
            loc = (loc - 1) % len(self.map)
        elif action == 'Right':
            loc = (loc + 1) % len(self.map)
        elif action == 'Suck':
            self.map[loc].neat = 'Clean'
        return self.check_goal()

    def check_goal(self):
        if self.map[0].neat == self.map[1].neat == 'Clean':
            return True
        else:
            return False


class GoalAgent(Agent):
    def __init__(self, loc, world):
        super().__init__(loc)
        self.model = GoalModel(world)

    def act(self, world):
        self.action = 'NoOp'
        for action in ['Left', 'Right', 'Suck', 'NoOp']:
            self.model.resume()
            if self.model.act(self.loc, action):
                self.action = action
                break
        return self.action

    def __repr__(self):
        my_dict = copy.copy(self.__dict__)
        my_dict.pop('model')
        return json.dumps(my_dict)


world = NoChangeWorld()
world.map[0].neat='Dirty'
world.map[1].neat='Clean'
world.agents = [GoalAgent(0, world)]
world.living()

map:    [{"name": "A", "neat": "Clean"}, {"name": "B", "neat": "Clean"}]
agents: [{"name": "GoalAgent", "loc": 0, "action": "Suck"}]
map:    [{"name": "A", "neat": "Clean"}, {"name": "B", "neat": "Clean"}]
agents: [{"name": "GoalAgent", "loc": 1, "action": "Left"}]


KeyboardInterrupt: 

In [None]:
# 基于效用的Agent
class UtilityModel(Model):
    def act(self, loc, action):
        if action == 'Left':
            loc = (loc - 1) % len(self.map)
        elif action == 'Right':
            loc = (loc + 1) % len(self.map)
        elif action == 'Suck':
            self.map[loc].neat = 'Clean'

    def cal_utility(self, action):
        score = 0
        if self.map[0].neat == 'Clean':
            score+=2
        if self.map[1].neat == 'Clean':
            score+=2
        if action in ['Right','Left']:
            score -=1
        if action in ['Suck']:
            score -=0.5
        return score

class UtilityAgent(Agent):
    def __init__(self, loc, world):
        super().__init__(loc)
        self.model = UtilityModel(world)

    def act(self, world):
        self.action = 'NoOp'
        best_score = -10
        for action in ['Left', 'Right', 'Suck', 'NoOp']:
            self.model.resume()
            before_action_score = self.model.cal_utility('NoOp')
            self.model.act(self.loc, action)
            after_action_score = self.model.cal_utility(action)
            print(action, before_action_score, after_action_score)
            if after_action_score - before_action_score > best_score:
                best_score = after_action_score - before_action_score
                self.action = action
        return self.action

    def __repr__(self):
        my_dict = copy.copy(self.__dict__)
        my_dict.pop('model')
        return json.dumps(my_dict)


world = World()
world.agents = [UtilityAgent(0, world)]
world.living()

In [10]:
# 基于学习的Agent
class LearnModel(Model):
    def act(self, loc, action):
        if action == 'Left':
            loc = (loc - 1) % len(self.map)
        elif action == 'Right':
            loc = (loc + 1) % len(self.map)
        elif action == 'Suck':
            self.map[loc].neat = 'Clean'

    def cal_utility(self, action):
        score = 0
        if self.map[0].neat == 'Clean':
            score+=2
        if self.map[1].neat == 'Clean':
            score+=2
        if action in ['Right','Left']:
            score -=1
        if action in ['Suck']:
            score -=0.5
        return score

class LearnAgent(Agent):
    def __init__(self, loc, world):
        super().__init__(loc)
        self.model = LearnModel(world)
        self.rules = dict()

    def act(self, world):
        # 如果当前情况在rules中，直接响应
        now_scene = "-".join([
            world.map[0].neat,
            world.map[1].neat,
            world.map[self.loc].name
        ])
        if now_scene in self.rules:
            self.action = self.rules[now_scene]
        else:
            self.action = 'NoOp'
            best_score = -10
            for action in ['Left', 'Right', 'Suck', 'NoOp']:
                self.model.resume()
                before_action_score = self.model.cal_utility('NoOp')
                self.model.act(self.loc, action)
                after_action_score = self.model.cal_utility(action)
                print(action, before_action_score, after_action_score)
                if after_action_score - before_action_score > best_score:
                    best_score = after_action_score - before_action_score
                    self.action = action
            self.rules[now_scene] = self.action
        return self.action

    def __repr__(self):
        my_dict = copy.copy(self.__dict__)
        my_dict.pop('model')
        return json.dumps(my_dict)


world = World()
world.agents = [LearnAgent(0, world)]
world.living()

Left 2 1
Right 2 1
Suck 2 1.5
NoOp 2 2
map:    [{"name": "A", "neat": "Clean"}, {"name": "B", "neat": "Dirty"}]
agents: [{"name": "LearnAgent", "loc": 0, "action": "NoOp", "rules": {"Clean-Dirty-A": "NoOp"}}]
Left 0 -1
Right 0 -1
Suck 0 1.5
NoOp 0 0
map:    [{"name": "A", "neat": "Clean"}, {"name": "B", "neat": "Dirty"}]
agents: [{"name": "LearnAgent", "loc": 0, "action": "Suck", "rules": {"Clean-Dirty-A": "NoOp", "Dirty-Dirty-A": "Suck"}}]
map:    [{"name": "A", "neat": "Clean"}, {"name": "B", "neat": "Dirty"}]
agents: [{"name": "LearnAgent", "loc": 0, "action": "NoOp", "rules": {"Clean-Dirty-A": "NoOp", "Dirty-Dirty-A": "Suck"}}]
map:    [{"name": "A", "neat": "Clean"}, {"name": "B", "neat": "Dirty"}]
agents: [{"name": "LearnAgent", "loc": 0, "action": "Suck", "rules": {"Clean-Dirty-A": "NoOp", "Dirty-Dirty-A": "Suck"}}]
map:    [{"name": "A", "neat": "Clean"}, {"name": "B", "neat": "Dirty"}]
agents: [{"name": "LearnAgent", "loc": 0, "action": "Suck", "rules": {"Clean-Dirty-A": "NoOp

KeyboardInterrupt: 