In [4]:
import json
import pandas as pd

import lightning as L
import torch
import torch.nn as nn
import torch.nn.functional as F

from datasets import Dataset, Features, Sequence, Value
from lightning import Trainer

from sources.fallback_policy.encoder import HFEncoderModel, EncoderModel
from sources.fallback_policy.model import BeliefBaseEncoder
from sources.scienceworld import parse_beliefs


from torch.utils.data import DataLoader


class ContrastiveQNetwork(L.LightningModule):

    def __init__(self,
                 belief_dim: int,
                 encoder_model: EncoderModel,
                 n_blocks: int = 1):
        super(ContrastiveQNetwork, self).__init__()
        self.belief_base_encoder = BeliefBaseEncoder(belief_dim, n_blocks)
        self.similarity_function = nn.CosineSimilarity(dim=1, eps=1e-6)
        self.encoder_model = encoder_model


    def _encode_batch(self, batch):
        belief_base_emb = self.encoder_model.encode_batch(batch['belief_base'], include_cls=False)
        action_emb = self.encoder_model.encode_batch(batch['action'], include_cls=False)
        return belief_base_emb, action_emb


    def forward(self, batch):
        belief_base_emb, action_tensor = self._encode_batch(batch)
        belief_base_sizes = batch['belief_base_sizes']
        encoded_belief_base, attention = self.belief_base_encoder(belief_base_emb, belief_base_sizes)
        similarity_matrix = self.contrastive_step(encoded_belief_base, action_tensor)
        return similarity_matrix

    def training_step(self, batch, batch_idx):
        similarity_matrix = self.forward(batch)
        batch_size = similarity_matrix.size(0)  # batch_size, similarity
        cl_label = torch.arange(batch_size, dtype=torch.long)
        loss = F.cross_entropy(similarity_matrix, cl_label)
        self.log("train_loss", loss, prog_bar=True, on_step=True, on_epoch=True)
        return loss

    def contrastive_step(self,
                         belief_base_emb: torch.Tensor,
                         action_emb: torch.Tensor,
                         goal_emb: torch.Tensor):
        # x1 representation (state+action)
        x1 = belief_base_emb + action_emb
        # x2 representation (goal?)
        x2 = goal_emb

        similarity_matrix = self.similarity_function(x1.unsqueeze(1), x2.unsqueeze(0))
        return similarity_matrix

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(params=self.parameters(), lr=1e-4)
        # scheduler = get_linear_schedule_with_warmup(optimizer, num_training_steps=self.hparams['training_epochs'], num_warmup_steps=0)
        # return {"optimizer": optimizer, "lr_scheduler": scheduler}

        return {"optimizer": optimizer}


def load_goldpaths():
    goldpath_file = "/opt/data/scienceworld-goldpaths/goldsequences-0.json"
    with open(goldpath_file) as f:
        data = json.load(f)
    json_data = data['0']

    gold_sequence = json_data['goldActionSequences'][0]['path']
    goal = json_data['goldActionSequences'][0]['taskDescription'].split('.')[0]
    variation_idx = json_data['goldActionSequences'][0]['variationIdx']
    print(f"Goal: {goal} - variation {variation_idx}")

    last_reward = 0
    use_cls = False
    observation = ""
    all_trajectories = []
    for i, trajectory in enumerate(gold_sequence):
        look_around = trajectory['freelook']
        inventory = trajectory['inventory']
        belief_base = parse_beliefs(observation=observation, look=look_around, inventory=inventory)
        is_done = trajectory['isCompleted']
        if is_done == 'true':
            next_state = ""
            break

        belief_base = belief_base + [goal]
        if trajectory['action'] != 'look around':
            all_trajectories.append({
                    'belief_base': belief_base,
                    'action': trajectory['action'],
                    'num_beliefs': len(belief_base),
            })

    return all_trajectories


encoder_model = HFEncoderModel("princeton-nlp/sup-simcse-roberta-base")
trajectories = load_goldpaths()

trajectories_pd = pd.DataFrame(trajectories)
dataset = Dataset.from_pandas(trajectories_pd, features=Features({
        "belief_base": Sequence(Value(dtype="string")),
        "action": Value(dtype="string"),
        "num_beliefs": Value(dtype="int32")
}))
dataset

Goal: Your task is to boil water - variation 0


Dataset({
    features: ['belief_base', 'action', 'num_beliefs'],
    num_rows: 33
})

In [17]:
belief_base_torch = torch.randn(1,768)
candidate_actions = torch.randn(23, 768)

sim = F.cosine_similarity(belief_base_torch.unsqueeze(1), candidate_actions.unsqueeze(0))
sim.argmax(dim=-1)


tensor([20])

In [3]:
def collate_fn(data):
   actions = [d['action'] for d in data]
   num_beliefs = [d['num_beliefs'] for d in data]
   belief_base = [d['belief_base'] for d in data]
   max_length =  max([len(b) for b in belief_base])
   padded_belief_base = []
   for bb in belief_base:
    pad_size = max_length - len(bb)
    if pad > 0:
        bb = bb + [''] * pad_size
    padded_belief_base.append(bb)
       
   return {'actions': actions,
           'num_beliefs': num_beliefs,
           'belief_base': padded_belief_base}

dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=16)
model = ContrastiveQNetwork(768, encoder_model=encoder_model)

trainer = Trainer(max_epochs=10,
                  accelerator='gpu',
                  check_val_every_n_epoch=1,
                  val_check_interval=0.25)
trainer.fit(model, dataloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name                | Type              | Params
----------------------------------------------------------
0 | belief_base_encoder | BeliefBaseEncoder | 3.0 M 
1 | similarity_function | CosineSimilarity  | 0     
----------------------------------------------------------
3.0 M     Trainable params
0         Non-trainable params
3.0 M     Total params
11.803    Total estimated model params size (MB)
/home/ichida/miniconda3/envs/defau

Training: |          | 0/? [00:00<?, ?it/s]

TypeError: TextEncodeInput must be Union[TextInputSequence, Tuple[InputSequence, InputSequence]]