In [1]:
import argparse

import lightning
import pandas as pd
import torch
from datasets import Dataset, Features, Sequence, Value
from lightning import Trainer
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import TensorBoardLogger
from scienceworld import ScienceWorldEnv
from torch.utils.data import DataLoader

from sources.fallback_policy.encoder import HFEncoderModel
from sources.fallback_policy.model import ContrastiveQNetwork
from sources.scienceworld.utils import parse_beliefs, parse_goal

lightning.seed_everything(42)

Seed set to 42


42

In [6]:
env = ScienceWorldEnv()
env.load("boil", 0, "openDoors")  # TODO: parametrize task name
goal = parse_goal(env.getTaskDescription())
goal = f"Your task is to {goal}"
print(f"Scienceworld environment started. Goal: {goal} - Variation: {0}")

observation, info = env.reset()
belief_base = parse_beliefs(observation, info['look'], info['inv']) + [goal]
belief_base

Scienceworld environment started. Goal: Your task is to boil water - Variation: 0


['This room is called the hallway.',
 'You see the agent',
 'You see a substance called air',
 'You see a picture',
 'A door to the art studio (that is open)',
 'A door to the bedroom (that is open)',
 'A door to the greenhouse (that is open)',
 'A door to the kitchen (that is open)',
 'A door to the living room (that is open)',
 'A door to the workshop (that is open)',
 'In your inventory, you see: an orange',
 'Your task is to boil water']

In [74]:
model_name = 'princeton-nlp/sup-simcse-roberta-base'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
encoder_model = HFEncoderModel(model_name, device=device)

#4 blocks with action corrected
#checkpoint_file = "../../checkpoints/sup/version_17/epoch=47-step=240-train_loss_epoch=0.821.ckpt"

#2 blocks with correct actions
#checkpoint_file = "../../checkpoints/sup/version_18/epoch=37-step=190-train_loss_epoch=0.816.ckpt"

#8 blocks with corrrect actions
#checkpoint_file = "../../checkpoints/sup/version_19/epoch=49-step=250-train_loss_epoch=0.834.ckpt"

# 2 blocks + correct actions + 3 last actions
#checkpoint_file = "../../checkpoints/sup/version_20/epoch=37-step=190-train_loss_epoch=0.806.ckpt"


# 2 blocks + original actions + 2 last actions
checkpoint_file = "../../checkpoints/sup/version_21/epoch=37-step=190-train_loss_epoch=0.803.ckpt"

model = ContrastiveQNetwork.load_from_checkpoint(checkpoint_file, encoder_model=encoder_model)



In [75]:
model = model.to('cuda').eval()
env = ScienceWorldEnv()
env.load("boil", 0, "openDoors")  # TODO: parametrize task name
goal = parse_goal(env.getTaskDescription())
goal = f"Your task is to {goal}"
print(f"Evaluating model in Scienceworld environment. Goal {goal}")

belief_base_tracker = []

with torch.no_grad():
    max_steps = 36
    action = "look around"
    plan_tracker = []
    previous_action = []
    acc_reward = 0
    for step in range(max_steps):
        obs, reward, is_done, info = env.step(action)
        acc_reward += reward
        print(f" => Step {step} - reward: {reward:.3f} - is_done: {is_done} - action: {action}")
        print(f"\t Observation:{obs}")
        plan_tracker.append({'step': step, 'action': action, 'observation': obs})
        if is_done:
            break
        #print(f"\t Observation: {obs}")
        belief_base = parse_beliefs(observation=obs, look=info['look'], inventory=info['inv']) + [goal]
        
        for a in previous_action[-2:]:
            belief_base.append(f"You execute {a['action']} at turn {a['turn']}")

        belief_base_tracker.append(belief_base)
        num_beliefs = len(belief_base) + 1  # including cls
        # candidate_actions = available_actions
        candidate_actions = info['valid']
        q_values = model.act(belief_base, candidate_actions=candidate_actions)
        selected_action = q_values.argmax(dim=-1)[0]  # greedy selection
        action = candidate_actions[selected_action]
        values, idxs = torch.sort(q_values.squeeze(0), descending=True)
        top_k = 3
        print(f"\tSelected action: {action} - Action space - Top {top_k}:")
        for i, idx in enumerate(idxs[:top_k]):
            print(f"\t\tCandidate Action: {candidate_actions[idx]} - q_value: {values[i]:.3f}")
        

        previous_action.append({
                'turn': step,
                'action': action
        })


Evaluating model in Scienceworld environment. Goal Your task is to boil water
 => Step 0 - reward: 0.000 - is_done: False - action: look around
	 Observation:This room is called the hallway. In it, you see: 
	the agent
	a substance called air
	a picture
You also see:
	A door to the art studio (that is open)
	A door to the bedroom (that is open)
	A door to the greenhouse (that is open)
	A door to the kitchen (that is open)
	A door to the living room (that is open)
	A door to the workshop (that is open)
	Selected action: go to kitchen - Action space - Top 3:
		Candidate Action: go to kitchen - q_value: 1.750
		Candidate Action: go to door to kitchen - q_value: 1.705
		Candidate Action: look at door to kitchen - q_value: 1.649
 => Step 1 - reward: 0.000 - is_done: False - action: go to kitchen
	 Observation:You move to the kitchen.
	Selected action: pick up thermometer - Action space - Top 3:
		Candidate Action: pick up thermometer - q_value: 1.552
		Candidate Action: pick up ceramic cup 

In [76]:
print(f"Plan Summary: {acc_reward}")
for i, a in enumerate(plan_tracker):
    #print(f"{a}")
    print(f"{a['step']} - {a['action']}")


Plan Summary: 73
0 - look around
1 - go to kitchen
2 - pick up thermometer
3 - open cupboard
4 - pick up metal pot
5 - move metal pot to sink
6 - activate sink
7 - deactivate sink
8 - pick up metal pot
9 - focus on substance in metal pot
10 - pour metal pot into metal pot
11 - focus on metal pot
12 - focus on metal pot
13 - focus on metal pot
14 - move metal pot to stove
15 - activate stove
16 - look at substance in metal pot
17 - use thermometer on inventory
18 - look at substance in metal pot
19 - use thermometer on inventory
20 - look at substance in metal pot
21 - use thermometer on inventory
22 - look at substance in metal pot
23 - use thermometer on inventory
24 - look at substance in metal pot
25 - use thermometer on inventory
26 - look at substance in metal pot
27 - use thermometer on inventory
28 - look at substance in metal pot
29 - use thermometer on inventory
30 - look at substance in metal pot
31 - use thermometer on inventory
32 - look at substance in metal pot
33 - use t

In [59]:
print(env.getGoalProgressStr())

Completed keys: 
----------------------------------------------------------------------------------------------------
Sequential Subgoals:
----------------------------------------------------------------------------------------------------
0	true	                                GoalFind	focus on substance
1	true	                 GoalChangeStateOfMatter	substance is in a liquid state
2	false	                 GoalChangeStateOfMatter	substance is in a gaseous state (or combusting)
----------------------------------------------------------------------------------------------------
Unordered and Optional Subgoals:
----------------------------------------------------------------------------------------------------
0	true	                    GoalInRoomWithObject	be in same location as water
1	true	            GoalObjectsInSingleContainer	have substance alone in a single container
2	true	              GoalActivateDeviceWithName	activate heater (stove)
3	false	              GoalActivateDeviceWi

In [79]:
model = model.to('cuda').eval()
env = ScienceWorldEnv()
env.load("boil", 0, "openDoors", generateGoldPath=True)  # TODO: parametrize task name
goal = parse_goal(env.getTaskDescription())
goal = f"Your task is to {goal}"
print(f"Evaluating model in Scienceworld environment. Goal {goal}")

gold_plan_tracker = []

with torch.no_grad():
    plan = []
    previous_action = []
    acc_reward = 0
    for step, action in enumerate(env.getGoldActionSequence()):
        if action == "examine substance in metal pot":
            action = "look at substance in metal pot"
        obs, reward, is_done, info = env.step(action)
        acc_reward += reward
        print(f" => Step {step} - reward: {reward:.3f} - is_done: {is_done} - action: {action}")
        print(f"\t Observation:{obs}")
        gold_plan_tracker.append({'step': step, 'action': action, 'observation': obs})
        if is_done:
            print("finish")
            break


Evaluating model in Scienceworld environment. Goal Your task is to boil water
 => Step 0 - reward: 0.000 - is_done: False - action: open door to kitchen
	 Observation:The door is already open.
 => Step 1 - reward: 0.000 - is_done: False - action: go to kitchen
	 Observation:You move to the kitchen.
 => Step 2 - reward: 0.000 - is_done: False - action: look around
	 Observation:This room is called the kitchen. In it, you see: 
	the agent
	a substance called air
	a chair. On the chair is: nothing.
	a counter. On the counter is: a bowl (containing a red apple, a banana, an orange, a potato), a drawer.
	a cupboard. The cupboard door is closed. 
	a freezer. The freezer door is closed. 
	a fridge. The fridge door is closed. 
	a glass jar (containing a substance called sodium chloride)
	a lighter
	a oven, which is turned off. The oven door is closed. 
	a painting
	a sink, which is turned off. In the sink is: nothing.
	a substance called soap
	a stopwatch, which is deactivated. 
	a stove, whic

In [41]:
print(env.getGoalProgressStr())

Completed keys: 
----------------------------------------------------------------------------------------------------
Sequential Subgoals:
----------------------------------------------------------------------------------------------------
0	true	                                GoalFind	focus on substance
1	true	                 GoalChangeStateOfMatter	substance is in a liquid state
2	true	                 GoalChangeStateOfMatter	substance is in a gaseous state (or combusting)
----------------------------------------------------------------------------------------------------
Unordered and Optional Subgoals:
----------------------------------------------------------------------------------------------------
0	true	                    GoalInRoomWithObject	be in same location as water
1	true	            GoalObjectsInSingleContainer	have substance alone in a single container
2	true	              GoalActivateDeviceWithName	activate heater (stove)
3	false	              GoalActivateDeviceWit

In [78]:
print(f"Plan Summary: {acc_reward}")
for i, a in enumerate(gold_plan_tracker):
    #print(f"{a}")
    print(f"{a['action']}")


Plan Summary: 100
open door to kitchen
go to kitchen
look around
pick up thermometer
open cupboard
pick up metal pot
look around
move metal pot to sink
activate sink
deactivate sink
pick up metal pot
focus on substance in metal pot
pour metal pot into metal pot
pick up metal pot
move metal pot to stove
activate stove
examine substance in metal pot
use thermometer in inventory on substance in metal pot
examine substance in metal pot
use thermometer in inventory on substance in metal pot
examine substance in metal pot
use thermometer in inventory on substance in metal pot
examine substance in metal pot
use thermometer in inventory on substance in metal pot
examine substance in metal pot
use thermometer in inventory on substance in metal pot
examine substance in metal pot
use thermometer in inventory on substance in metal pot
examine substance in metal pot
use thermometer in inventory on substance in metal pot
examine substance in metal pot
use thermometer in inventory on substance in met

In [47]:
for a, ga in zip(plan_tracker[:35], gold_plan_tracker[:35]):
    print(f"Action: {a['action']} - Gold Action: {ga['action']}")

Action: look around - Gold Action: open door to kitchen
Action: go to kitchen - Gold Action: go to kitchen
Action: pick up thermometer - Gold Action: look around
Action: open cupboard - Gold Action: pick up thermometer
Action: pick up metal pot - Gold Action: open cupboard
Action: move metal pot to sink - Gold Action: pick up metal pot
Action: activate sink - Gold Action: look around
Action: deactivate sink - Gold Action: move metal pot to sink
Action: pick up metal pot - Gold Action: activate sink
Action: focus on substance in metal pot - Gold Action: deactivate sink
Action: pour metal pot into metal pot - Gold Action: pick up metal pot
Action: focus on metal pot - Gold Action: focus on substance in metal pot
Action: focus on metal pot - Gold Action: pour metal pot into metal pot
Action: move metal pot to stove - Gold Action: pick up metal pot
Action: activate stove - Gold Action: move metal pot to stove
Action: look at substance in metal pot - Gold Action: activate stove
Action: use 