In [2]:
import json

import lightning as L
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from datasets import Dataset, Features, Sequence, Value
from lightning import Trainer
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import TensorBoardLogger
from scienceworld import ScienceWorldEnv
from torch.utils.data import DataLoader

from sources.fallback_policy.encoder import HFEncoderModel, EncoderModel
from sources.fallback_policy.model import BeliefBaseEncoder, ContrastiveQNetwork
from sources.scienceworld.utils import parse_beliefs

In [3]:
encoder_model = HFEncoderModel("princeton-nlp/sup-simcse-roberta-base", device='cuda')



In [1]:
print("a")

a


In [7]:
goldpath_df = pd.read_csv("/opt/data/scienceworld-goldpaths/trajectories_csv/tabular_task-1-boil.csv")
goldpath_df = goldpath_df[goldpath_df['variation_idx'] == 3]  # TODO: remover filtro
goldpath_df = goldpath_df.sort_values("turn")
goldpath_df[['turn', 'action', 'observation']]

Unnamed: 0,turn,action,observation
113,1,open door to kitchen,The door is already open.
114,2,go to kitchen,You move to the kitchen.
115,3,pick up thermometer,You move the thermometer to the inventory.
116,4,open cupboard,The cupboard is now open.
117,5,pick up metal pot,You move the metal pot to the inventory.
118,6,move metal pot to sink,You move the metal pot to the sink.
119,7,activate sink,"The sink appears broken, and can't be activate..."
120,8,deactivate sink,The sink is already deactivated.
121,9,pick up metal pot,You move the metal pot to the inventory.
122,10,move metal pot to sink,You move the metal pot to the sink.


In [7]:
print("a")

2

# Loading Trajectories

In [None]:
all_trajectories = []

previous_actions = []
observation = ""
for i, row in goldpath_df.iterrows():
    belief_base = parse_beliefs(observation=observation, look=row['look_around'], inventory=row['inventory'])
    belief_base = [b for b in belief_base if len(b) > 0] + [row['goal']]
    for a in previous_actions[-5:]:
        belief_base.append(f"You executed the action {a['action']} at turn {a['turn']}")

    belief_base_sizes = len(belief_base) + 1
    action = row['action']
    all_trajectories.append({
            'belief_base': belief_base,
            'action': action,
            'belief_base_sizes': belief_base_sizes,
    })
    
    previous_actions.append({
                    'turn': row['turn'],
                    'action': action
            })
    
    observation = row['observation']


trajectories_pd = pd.DataFrame(all_trajectories)
dataset = Dataset.from_pandas(trajectories_pd, features=Features({
        "belief_base": Sequence(Value(dtype="string")),
        "action": Value(dtype="string"),
        "belief_base_sizes": Value(dtype="int32")
}))

In [None]:
def collate_fn(data):
    # tem que fazer o encode aqui, para entregar batchs de vetores prontos
    actions = [d['action'] for d in data]
    belief_base_sizes = [d['belief_base_sizes'] for d in data]
    belief_base = [d['belief_base'] for d in data]

    return {'actions': actions,
            'belief_base_sizes': belief_base_sizes,
            'belief_base': belief_base}
dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=8, shuffle=True)

In [None]:
EPOCHS = 40
model = ContrastiveQNetwork(768, encoder_model=encoder_model)

base_dir = "cl_step"
tb_logger = TensorBoardLogger(f"logs/{base_dir}")
tb_logger.log_hyperparams(model.hparams)
version = tb_logger.version
filename = base_dir + "/version_" + str(version) + "/" + "v" + str(
        version) + "-{epoch}-{step}-{train_loss_epoch:.3f}"
checkpoint_callback = ModelCheckpoint(dirpath='checkpoints',
                                      monitor='train_loss_epoch',
                                      save_top_k=2,
                                      filename=filename)

trainer = Trainer(max_epochs=EPOCHS,
                  accelerator='gpu',
                  logger=tb_logger,
                  callbacks=[checkpoint_callback]
                  )
trainer.fit(model, dataloader)

In [None]:
model = model.to('cuda')
model = model.eval()

env = ScienceWorldEnv()
goal = row['goal']
variation_idx = row['variation_idx']

env.load("boil", variation_idx, "openDoors")
with torch.no_grad():
    max_steps = 30
    action = "look around"

    plan = []
    previous_action = []
    for step in range(max_steps):
        obs, reward, is_done, info = env.step(action)

        print(f" => Step {step} - reward: {reward:.3f} - is_done: {is_done} - action: {action}")
        belief_base = parse_beliefs(observation=obs, look=info['look'], inventory=info['inv']) + [goal]
        belief_base = [b.replace("greenhouse", "green house") for b in belief_base]

        for a in previous_action[-5:]:
            belief_base.append(f"You executed the action {a['action']} at turn {a['turn']}")

        num_beliefs = len(belief_base) + 1 + 1  # including cls
        # candidate_actions = available_actions
        candidate_actions = info['valid']
        # q_values = model.act(belief_base, candidate_actions=info['valid'])
        q_values = model.act(belief_base, candidate_actions=candidate_actions)
        selected_action = q_values.argmax(dim=-1)[0]
        action = candidate_actions[selected_action]
        # if i == 1:
        #   action = "focus on substance in metal pot"
        # print(f"Belief Base: {belief_base}")
        #print(f"obs: {obs}")
        #print(f"Selected action: {action}")
        values, idxs = torch.sort(q_values.squeeze(0), descending=True)

        top_k = 3
        #print(f"\tAction space - Top {top_k}:")
        #for i, idx in enumerate(idxs[:top_k]):
        #    print(f"\t\tCandidate Action: {candidate_actions[idx]} - q_value: {values[i]:.3f}")

        plan.append(action)

        previous_action.append({
                'turn': step,
                'action': action
        })

    #print("Plan Executed: ")
    #for i, a in enumerate(plan):
    #    print(f"{i} -  {a}")