In [1]:
from scienceworld import ScienceWorldEnv
from sources.agent import BDIAgent
from sources.scienceworld import parse_observation

from sources.bdi_components.belief import State
from sources.bdi_components.inference import NLIModel
from sources.bdi_components.plans import PlanLibrary

In [2]:
#hg_model_hub_name = "alisawuffles/roberta-large-wanli"
hg_model_hub_name = "ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli"
nli_model = NLIModel(hg_model_hub_name, device='cuda')

Some weights of the model checkpoint at ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model size: 355,362,819


In [3]:
# MAIN GOAL


melt_plan = """
    IF your goal is to freeze mercury THEN
        move to kitchen,
        pick up thermometer,
        get metal pot,
        get mercury,
        focus on substance in metal pot,
        refrigerate mercury
"""

# get metal pot
subplan_a = """
    IF your goal is to get metal pot CONSIDERING you are in the kitchen AND you see a cupboard THEN
        open cupboard,
        pick up metal pot
"""


subplan_b = """
    IF your goal is to get mercury CONSIDERING you are not in workshop AND you have metal pot in your inventory THEN
        move to workshop,
        pour cup into metal pot in inventory
"""

subplan_c = """
    IF your goal is to refrigerate mercury CONSIDERING you have a metal pot with mercury in your inventory THEN
        move to workshop,
        open freezer,
        move metal pot to freezer,
        wait,
        wait,
        use thermometer on metal pot,
        focus on substance in metal pot,
        pick up metal pot
"""

# heat water on stove
subplan_d = """
    IF your goal is to heat mercury on blast furnace CONSIDERING you have metal pot with mercury in your inventory THEN
        move to foundry,
        open blast furnace,
        move metal pot to blast furnace,
        activate blast furnace,
        use thermometer on metal pot,
        focus on substance in metal pot,
        wait,
        use thermometer on metal pot
"""

all_plans = [melt_plan, subplan_a, subplan_b, subplan_c, subplan_d]

pl = PlanLibrary()
pl.load_plans_from_strings(all_plans)  # load plans from strings above
pl.load_plans_from_file("plans_navigation.txt")  # load plans from file
print(pl.plans.keys())

dict_keys(['freeze mercury', 'get metal pot', 'get mercury', 'refrigerate mercury', 'heat mercury on blast furnace', 'move to art studio', 'move to bedroom', 'move to greenhouse', 'move to kitchen', 'move to living room', 'move to workshop', 'move to outside', 'move to foundry', 'move to bathroom', 'move to hallway'])


In [5]:
env = ScienceWorldEnv("", "", envStepLimit=100)

#root_event = 'use chemistry to create green paint'

task = 'freeze'
env.load(task, 0)
#randVariationIdx = env.getRandomVariationTest()
randVariationIdx = 25
env.load(task, randVariationIdx)

goal = env.getTaskDescription().split('.')[0].replace("Your task is to", "").strip()

print(f"Task Name: " + 'boil' + " variation " + str(randVariationIdx))
print("Task Description: " + str(env.getTaskDescription()))

# Reset the environment
observation, info = env.reset()
# initial state
observation, reward, isCompleted, info = env.step('open door to hallway')
observation, reward, isCompleted, info = env.step('go to hallway')
observation, reward, isCompleted, info = env.step('open door to kitchen')
observation, reward, isCompleted, info = env.step('go to kitchen')
observation, reward, isCompleted, info = env.step('look around')
current_state = parse_observation(observation=observation, inventory=info['inv'], look_around=info['look'],
                                  task=goal, valid_actions=info['valid'])
print(current_state.look)
goal

Task Name: boil variation 25
Task Description: Your task is to freeze mercury. First, focus on the substance. Then, take actions that will cause it to change its state of matter.
['This room is called the kitchen.', 'You see the agent', 'You see a substance called air', 'You see a chair. On the chair is: nothing.', 'You see a counter. On the counter is: a bowl (containing a red apple, a banana, an orange, a potato), a drawer.', 'You see a cupboard. The cupboard door is closed.', 'You see a freezer. The freezer door is closed.', 'You see a fridge. The fridge door is closed.', 'You see a glass jar (containing a substance called sodium chloride)', 'You see a lighter', 'You see a oven, which is turned off. The oven door is closed.', 'You see a painting', 'You see a sink, which is turned off. In the sink is: nothing.', 'You see a substance called soap', 'You see a stopwatch, which is deactivated.', 'You see a stove, which is turned off. On the stove is: nothing.', 'You see a table. On the t

'freeze mercury'

In [18]:
def step_function(action: str) -> State:
    observation, reward, isCompleted, info = env.step(action)
    updated_state = parse_observation(observation=observation,
                                      inventory=info['inv'],
                                      look_around=info['look'],
                                      task=goal,
                                      valid_actions=info['valid'],
                                      score=info['score'])
    print(f"Action: {action} -> Obs: {observation} -> score {info['score']}")
    return updated_state


agent = BDIAgent(plan_library=pl, nli_model=nli_model)
last_state = agent.act(current_state, step_function=step_function)

print(env.getGoalProgressStr())
print(last_state.reward)

Action: open door to hallway -> Obs: The door is now open. -> score 0
Action: go to hallway -> Obs: You move to the hallway. -> score 0
Action: open door to kitchen -> Obs: The door is now open. -> score 0
Action: go to kitchen -> Obs: You move to the kitchen. -> score 0
Action: pick up thermometer -> Obs: You move the thermometer to the inventory. -> score 0
Action: open cupboard -> Obs: The cupboard is now open. -> score 0
Action: pick up metal pot -> Obs: You move the metal pot to the inventory. -> score 0
Action: open door to hallway -> Obs: The door is already open. -> score 0
Action: go to hallway -> Obs: You move to the hallway. -> score 0
Action: open door to workshop -> Obs: The door is now open. -> score 0
Action: go to workshop -> Obs: You move to the workshop. -> score 10
Action: pour cup into metal pot in inventory -> Obs: You pour the contents of the paper cup into the metal pot. -> score 10
Action: focus on substance in metal pot -> Obs: You focus on the mercury. -> scor

In [6]:
last_state.look

['This room is called the foundry.',
 'You see the agent',
 'You see a substance called air',
 'You see a blast furnace, which is turned off. The blast furnace door is closed.',
 'You see a sink, which is turned off. In the sink is: nothing.',
 'You see a table. On the table is: nothing.',
 'A door to the outside (that is open)']