In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ['MUJOCO_GL'] = 'osmesa'
os.environ['PYOPENGL_PLATFORM'] = 'osmesa'  # or 'egl'
os.environ['CURL_CA_BUNDLE'] = ''

import sys
sys.path.append('..')

import hydra
from hydra import compose, initialize
from hydra.utils import to_absolute_path

import yaml
import torch
import pprint
import imageio
import numpy as np
from base64 import b64encode
from easydict import EasyDict
from omegaconf import OmegaConf
from IPython.display import HTML
from transformers import AutoModel, AutoTokenizer, logging

from models.bc_vilt_policy import BCViLTPolicy

from libero.libero.benchmark import get_benchmark
from libero.libero.envs import OffScreenRenderEnv, SubprocVectorEnv
from libero.lifelong.datasets import SequenceVLDataset, get_dataset
from libero.libero import benchmark, get_libero_path
from libero.lifelong.metric import raw_obs_to_tensor_obs

hydra.core.global_hydra.GlobalHydra.instance().clear()

  from .autonotebook import tqdm as notebook_tqdm
  if LooseVersion(torch.__version__) < LooseVersion("1.0.0"):


In [2]:
### load the default hydra config
initialize(config_path="configs/bc_policy", job_name=1)
hydra_cfg = compose(config_name="vilt")
yaml_config = OmegaConf.to_yaml(hydra_cfg)
cfg = EasyDict(yaml.safe_load(yaml_config))

pp = pprint.PrettyPrinter(indent=2)
pp.pprint(cfg.policy)

# prepare lifelong learning
cfg.folder = get_libero_path("datasets")
cfg.bddl_folder = get_libero_path("bddl_files")
cfg.init_states_folder = get_libero_path("init_states")
cfg.eval.num_procs = 1
cfg.eval.n_eval = 5
cfg.data.task_embedding_format = "bert"

cfg.train.n_epochs = 25

pp.pprint(f"Note that the number of epochs used in this example is intentionally reduced to 5.")

task_order = cfg.data.task_order_index # can be from {0 .. 21}, default to 0, which is [task 0, 1, 2 ...]
cfg.benchmark_name = "libero_goal" # can be from {"libero_spatial", "libero_object", "libero_goal", "libero_10"}
benchmark = get_benchmark(cfg.benchmark_name)(task_order)

# prepare datasets from the benchmark
datasets = []
descriptions = []
shape_meta = None
n_tasks = benchmark.n_tasks

for i in range(10):
    # currently we assume tasks from same benchmark have the same shape_meta
    task_i_dataset, shape_meta = get_dataset(
            dataset_path=os.path.join(cfg.folder, benchmark.get_task_demonstration(i)),
            obs_modality=cfg.data.obs.modality,
            initialize_obs_utils=(i==0),
            seq_len=cfg.data.seq_len,
    )
    # add language to the vision dataset, hence we call vl_dataset
    descriptions.append(benchmark.get_task(i).language)
    datasets.append(task_i_dataset)

The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  initialize(config_path="configs/bc_policy", job_name=1)


{ 'color_aug': { 'network': 'BatchWiseImgColorJitterAug',
                 'network_kwargs': { 'brightness': 0.3,
                                     'contrast': 0.3,
                                     'epsilon': 0.1,
                                     'hue': 0.3,
                                     'input_shape': None,
                                     'saturation': 0.3}},
  'embed_size': 128,
  'extra_state_encoder': {'extra_hidden_size': 128, 'extra_num_layers': 0},
  'image_encoder': { 'network': 'PatchEncoder',
                     'network_kwargs': { 'embed_size': None,
                                         'input_shape': None,
                                         'no_patch_embed_bias': False,
                                         'patch_size': [8, 8]}},
  'language_encoder': { 'network': 'MLPEncoder',
                        'network_kwargs': { 'hidden_size': 128,
                                            'input_size': 768,
                                  

In [3]:
def get_task_embs(cfg, descriptions, embedding_model_path=None):
    logging.set_verbosity_error()

    if cfg.data.task_embedding_format == "one-hot":
        # offset defaults to 1, if we have pretrained another model, this offset
        # starts from the pretrained number of tasks + 1
        offset = cfg.task_embedding_one_hot_offset
        descriptions = [f"Task {i+offset}" for i in range(len(descriptions))]

    if cfg.data.task_embedding_format == "bert" or cfg.data.task_embedding_format == "one-hot":
        if embedding_model_path != None:
            tz = AutoTokenizer.from_pretrained(embedding_model_path)
            model = AutoModel.from_pretrained(embedding_model_path)
        else:
            tz = AutoTokenizer.from_pretrained("bert-base-cased", cache_dir=to_absolute_path("./bert"))
            model = AutoModel.from_pretrained("bert-base-cased", cache_dir=to_absolute_path("./bert"))
        tokens = tz(
            text=descriptions,  # the sentence to be encoded
            add_special_tokens=True,  # Add [CLS] and [SEP]
            max_length=cfg.data.max_word_len,  # maximum length of a sentence
            padding="max_length",
            return_attention_mask=True,  # Generate the attention mask
            return_tensors="pt",  # ask the function to return PyTorch tensors
        )
        masks = tokens["attention_mask"]
        input_ids = tokens["input_ids"]
        task_embs = model(tokens["input_ids"], tokens["attention_mask"])[
            "pooler_output"
        ].detach()
    elif cfg.data.task_embedding_format == "gpt2":
        tz = AutoTokenizer.from_pretrained("gpt2")
        model = AutoModel.from_pretrained("gpt2")
        tz.pad_token = tz.eos_token
        tokens = tz(
            text=descriptions,  # the sentence to be encoded
            add_special_tokens=True,  # Add [CLS] and [SEP]
            max_length=cfg.data.max_word_len,  # maximum length of a sentence
            padding="max_length",
            return_attention_mask=True,  # Generate the attention mask
            return_tensors="pt",  # ask the function to return PyTorch tensors
        )
        task_embs = model(**tokens)["last_hidden_state"].detach()[:, -1]
    elif cfg.data.task_embedding_format == "clip":
        tz = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
        model = AutoModel.from_pretrained("openai/clip-vit-base-patch32")
        tokens = tz(
            text=descriptions,  # the sentence to be encoded
            add_special_tokens=True,  # Add [CLS] and [SEP]
            max_length=cfg.data.max_word_len,  # maximum length of a sentence
            padding="max_length",
            return_attention_mask=True,  # Generate the attention mask
            return_tensors="pt",  # ask the function to return PyTorch tensors
        )
        task_embs = model.get_text_features(**tokens).detach()
    elif cfg.data.task_embedding_format == "roberta":
        tz = AutoTokenizer.from_pretrained("roberta-base")
        model = AutoModel.from_pretrained("roberta-base")
        tz.pad_token = tz.eos_token
        tokens = tz(
            text=descriptions,  # the sentence to be encoded
            add_special_tokens=True,  # Add [CLS] and [SEP]
            max_length=cfg.data.max_word_len,  # maximum length of a sentence
            padding="max_length",
            return_attention_mask=True,  # Generate the attention mask
            return_tensors="pt",  # ask the function to return PyTorch tensors
        )
        task_embs = model(**tokens)["pooler_output"].detach()

    cfg.policy.language_encoder.network_kwargs.input_size = task_embs.shape[-1]

    return task_embs

In [4]:
embedding_model_path = "/baishuanghao/model/bert-base-cased"
file_path = f"/baishuanghao/code/BC-IB/data/{cfg.data.env_name}_task_embeddings.pt"
if os.path.exists(file_path):
    task_embs = torch.load(file_path)
else:
    task_embs = get_task_embs(cfg, descriptions, embedding_model_path)
    torch.save(task_embs, file_path)
benchmark.set_task_embs(task_embs)

datasets = [SequenceVLDataset(ds, emb) for (ds, emb) in zip(datasets, task_embs)]
n_demos = [data.n_demos for data in datasets]
n_sequences = [data.total_num_sequences for data in datasets]

In [5]:
cfg.device='cuda:0'
model = eval(cfg.policy.policy_type)(cfg, shape_meta)
start_epoch = model.load("/baishuanghao/code/BC-IB/outputs_sccucess/1121_1830_seed0/model_50.ckpt")
model.to(cfg.device)

[2024-11-23 15:53:42,313] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/root/miniconda3/envs/libero/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
  from pkg_resources import packaging  # type: ignore[attr-defined]
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
/root/miniconda3/envs/libero/compiler_compat/ld: cannot find -lcufile: No such file or directory
collect2: error: ld returned 1 exit status
/root/miniconda3/envs/libero/lib/python3.8/site-packages/pydantic/fields.py:826: PydanticDeprecatedSince20: Using extra keyword arguments on `Field` is deprecated and will be removed.

BCViLTPolicy(
  (img_aug): DataAugGroup(
    (aug_layer): Sequential(
      (0): BatchWiseImgColorJitterAug(
        (color_jitter): ColorJitter(brightness=[0.7, 1.3], contrast=[0.7, 1.3], saturation=[0.7, 1.3], hue=[-0.3, 0.3])
      )
      (1): TranslationAug(
        (crop_randomizer): CropRandomizer(input_shape=(3, 136, 136), crop_size=[128, 128], num_crops=1)
      )
    )
  )
  (encoders): ModuleList(
    (0): PatchEncoder(
      (conv): Sequential(
        (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
      (proj): Conv2d(64, 128, kernel_size=(8, 8), stride=(8, 8))
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): PatchEncoder(
      (conv): Sequential(
        (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (1): 

In [None]:
# You can turn on subprocess
action_dim = 7
env_num = 10

task_id = 0
task = benchmark.get_task(task_id)
task_emb = benchmark.get_task_emb(task_id)

env_args = {
            "bddl_file_name": os.path.join(
                cfg.bddl_folder, task.problem_folder, task.bddl_file
            ),
            "camera_heights": cfg.data.img_size,
            "camera_widths": cfg.data.img_size,
        }
env = SubprocVectorEnv(
    [lambda: OffScreenRenderEnv(**env_args) for _ in range(env_num)]
)


In [32]:
init_states_path = os.path.join(
    cfg.init_states_folder, task.problem_folder, task.init_states_file
)
init_states = torch.load(init_states_path)
indices = np.arange(env_num) % init_states.shape[0]
init_states_ = init_states[indices]

In [None]:
env.reset()
env.seed(cfg.train.seed)
model.reset()

obs = env.set_init_state(init_states_)

# Make sure the gripepr is open to make it consistent with the provided demos.
dummy_actions = np.zeros((env_num, action_dim))
for _ in range(5):
    env.step(dummy_actions)
    

In [None]:
from libero.libero.utils.time_utils import Timer
from libero.libero.utils.video_utils import VideoWriter

video_writer = VideoWriter("/baishuanghao/code/BC-IB/outputs_visual/libero", save_video=True, single_video=False) 

In [None]:
steps = 0
dones = [False] * env_num
num_success = 0
obs_tensors = [[]] * env_num
with torch.no_grad():
    while steps < cfg.eval.max_steps:
        steps += 1
        data = raw_obs_to_tensor_obs(obs, task_emb, cfg)
        actions = model.get_action(cfg, data)
        obs, reward, done, info = env.step(actions)
        video_writer.append_vector_obs(
            obs, dones, camera_name="agentview_image"
        )
        
        for k in range(env_num):
            dones[k] = dones[k] or done[k]
        if all(dones):
            break

    for k in range(env_num):
        num_success += int(dones[k])

success_rate = num_success / env_num

if all(dones):
    print(f'All done in {steps}/{cfg.eval.max_steps} step!')
else:
    print(f'Didn\'t done in {steps}/{cfg.eval.max_steps} step!')

print(f'Done: {done}')
print(f'success_rate: {success_rate}')
    

All done in 213/600 step!
Done: [ True  True]
success_rate: 1.0
Saved videos to /baishuanghao/code/BC-IB/outputs_visual/libero.


In [38]:
print(info)

[{'env_id': 0} {'env_id': 1}]


In [37]:
video_writer.save()

Saved videos to /baishuanghao/code/BC-IB/outputs_visual/libero.


In [27]:
steps = 0
dones = [False] * env_num
num_success = 0
obs_tensors = [[]] * env_num
with torch.no_grad():
    while steps < cfg.eval.max_steps:
        steps += 1
        data = raw_obs_to_tensor_obs(obs, task_emb, cfg)
        actions = model.get_action(cfg, data)
        obs, reward, done, info = env.step(actions)

        for k in range(env_num):
            dones[k] = dones[k] or done[k]
            obs_tensors[k].append(obs[k]["agentview_image"])
        if all(dones):
            break

    for k in range(env_num):
        num_success += int(dones[k])

success_rate = num_success / env_num

if all(dones):
    print(f'All done in {steps}/{cfg.eval.max_steps} step!')
else:
    print(f'Didn\'t done in {steps}/{cfg.eval.max_steps} step!')

print(f'Done: {done}')
print(f'success_rate: {success_rate}')

env.close()

All done in 213/600 step!
Done: [ True  True]
success_rate: 1.0


In [29]:
# visualize video
# obs_tensor: (env_num, T, H, W, C)

save_dir = 'outputs_visual/subproc'
os.makedirs(save_dir, exist_ok=True)

i = 1
while True:
    if os.path.exists(f'{save_dir}/tmp_video_{i}.mp4'):
        i = i + 1
        continue

    images = [img[::-1] for img in obs_tensors[0]]
    fps = 30
    writer  = imageio.get_writer(f'{save_dir}/tmp_video_{i}.mp4', fps=fps)
    for image in images:
        writer.append_data(image)
    writer.close()
    break

In [30]:
video_data = open(f'{save_dir}/tmp_video_{i}.mp4', "rb").read()
video_tag = f'<video controls alt="test" src="data:video/mp4;base64,{b64encode(video_data).decode()}" width="480" height="480">'
HTML(data=video_tag)