In [3]:
!git clone git@github.com:facebookresearch/vggt.git

Cloning into 'vggt'...
remote: Enumerating objects: 1265, done.[K
remote: Total 1265 (delta 0), reused 0 (delta 0), pack-reused 1265 (from 1)[K
Receiving objects: 100% (1265/1265), 64.94 MiB | 9.43 MiB/s, done.
Resolving deltas: 100% (579/579), done.


In [4]:
!mv vggt/vggt/* vggt/

In [1]:
# Example: loop over random ProcTHOR scenes
import prior
dataset = prior.load_dataset("procthor-10k")
train_scenes = dataset["train"]

NUM_UPDATES = 100


Fetching reference HEAD


    pip install --upgrade ai2thor
Alternatively, to downgrade to the old version of ProcTHOR-10K, run:
   prior.load_dataset("procthor-10k", revision="ab3cacd0fc17754d4c080a3fd50b18395fae8647")


Loading train: 100%|██████████| 10000/10000 [00:00<00:00, 22888.26it/s]
Loading val: 100%|██████████| 1000/1000 [00:00<00:00, 23495.66it/s]
Loading test: 100%|██████████| 1000/1000 [00:00<00:00, 23951.03it/s]


In [None]:
from ai2thor.controller import Controller
from cons import NUM_ACTIONS, EPISODE_STEPS, DEVICE, FEAT_DIM, TRAIN_EPOCHS
from models import ActorCritic
from rl import PPOTrainer, CLIPCuriosity, ThorNavEnv, VGGTCuriosity, ExtrinsicReward
import torch
import os


# Create CLIP curiosity module
clip_curiosity = CLIPCuriosity(
    device=DEVICE,
    buffer_size=EPISODE_STEPS,
    topk=5,
    ema_beta=0.99,
    reward_scale=1.0,
    every_n_steps=1,  # or 2/4 to save CLIP compute
)

# Build actor-critic and PPO trainer
ac = ActorCritic(feat_dim=FEAT_DIM, hidden_dim=256, num_actions=NUM_ACTIONS, device=DEVICE)
trainer = PPOTrainer(ac)


for upd in range(1, NUM_UPDATES + 1):
    # Sample random scene
    idx = torch.randint(0, len(train_scenes), (1,)).item()
    house = train_scenes[idx]

    controller = Controller(
        scene=house,
        snapToGrid=False,
        rotateStepDegrees=30,
        renderDepthImage=True,
    )

    try:
        env = ThorNavEnv(controller, clip_curiosity, extrinsic_reward=ExtrinsicReward())

        buf, ep_reward = trainer.collect_rollout(env, horizon=EPISODE_STEPS)
        trainer.ppo_update(buf, epochs=TRAIN_EPOCHS, is_pretrain=upd < 0.1 * NUM_UPDATES)

        print(f"[Update {upd}/{NUM_UPDATES}] Episode reward: {ep_reward:.3f}, steps: {len(buf)}")

        # Optionally save model
        if upd % 10 == 0:
            os.makedirs("checkpoints", exist_ok=True)
            torch.save(ac.state_dict(), f"checkpoints/ac_update_{upd}.pt")

    finally:
        controller.stop()

  from .autonotebook import tqdm as notebook_tqdm


[PPO] Epoch 20/20 Loss=0.0854 Policy=0.0003 Value=0.1929 Entropy=1.0986 KL=-0.0001
[Update 1/100] Episode reward: 11.728, steps: 256
[PPO] Epoch 20/20 Loss=0.0938 Policy=0.0002 Value=0.2095 Entropy=1.0986 KL=-0.0002
[Update 2/100] Episode reward: 13.349, steps: 256
[PPO] Epoch 20/20 Loss=0.1365 Policy=-0.0003 Value=0.2950 Entropy=1.0986 KL=-0.0002
[Update 3/100] Episode reward: 16.711, steps: 256
[PPO] Epoch 20/20 Loss=0.0738 Policy=0.0002 Value=0.1696 Entropy=1.0986 KL=-0.0001
[Update 4/100] Episode reward: 11.557, steps: 256
[PPO] Epoch 20/20 Loss=0.0767 Policy=0.0002 Value=0.1754 Entropy=1.0986 KL=0.0000
[Update 5/100] Episode reward: 15.892, steps: 256
[PPO] Epoch 20/20 Loss=0.0926 Policy=0.0022 Value=0.2072 Entropy=1.0985 KL=-0.0011
[Update 6/100] Episode reward: 23.088, steps: 256
[PPO] Epoch 20/20 Loss=0.1500 Policy=0.0006 Value=0.3220 Entropy=1.0986 KL=-0.0003
[Update 7/100] Episode reward: 16.323, steps: 256
[PPO] Epoch 20/20 Loss=0.0627 Policy=-0.0002 Value=0.1474 Entropy=1.0

KeyboardInterrupt: 

In [None]:
from ai2thor.controller import Controller
from cons import NUM_ACTIONS, EPISODE_STEPS, DEVICE, FEAT_DIM, TRAIN_EPOCHS
from models import ActorCritic
from rl import PPOTrainer, CLIPCuriosity, ThorNavEnv, RNDCuriosity, ExtrinsicReward
import torch
import os


# Create CLIP curiosity module
clip_curiosity = RNDCuriosity(
    device=DEVICE
)

# Example: simple extrinsic reward (optional)
def extrinsic_reward_fn(event):
    # e.g., punish failed actions slightly
    fail_penalty = 0.0
    if not event.metadata.get("lastActionSuccess", True):
        fail_penalty = -0.1

    move_bonus = 0.0
    if "Move" in event.metadata.get("lastAction", ""):
        move_bonus = 0.05
    
    return move_bonus + fail_penalty

# Build actor-critic and PPO trainer
ac = ActorCritic(feat_dim=FEAT_DIM, hidden_dim=256, num_actions=NUM_ACTIONS, device=DEVICE)
trainer = PPOTrainer(ac)


for upd in range(1, NUM_UPDATES + 1):
    # Sample random scene
    idx = torch.randint(0, len(train_scenes), (1,)).item()
    house = train_scenes[idx]

    controller = Controller(
        scene=house,
        snapToGrid=False,
        rotateStepDegrees=30,
        renderDepthImage=True,
    )

    try:
        env = ThorNavEnv(controller, clip_curiosity, extrinsic_reward=ExtrinsicReward())

        buf, ep_reward = trainer.collect_rollout(env, horizon=EPISODE_STEPS)
        trainer.ppo_update(buf, epochs=TRAIN_EPOCHS)

        print(f"[Update {upd}/{NUM_UPDATES}] Episode reward: {ep_reward:.3f}, steps: {len(buf)}")

        # Optionally save model
        if upd % 10 == 0:
            os.makedirs("checkpoints", exist_ok=True)
            torch.save(ac.state_dict(), f"checkpoints/ac_update_{upd}.pt")

    finally:
        controller.stop()

In [5]:
idx = torch.randint(0, len(train_scenes), (1,)).item()
house = train_scenes[idx]

controller = Controller(
    scene=house,
    snapToGrid=False,
    rotateStepDegrees=30,
    renderDepthImage=True,
)

In [6]:
event = controller.step("MoveAhead")

In [12]:
event.metadata

{'objects': [{'name': 'AlarmClock|surface|4|0',
   'position': {'x': 4.625216484069824,
    'y': 0.5214699506759644,
    'z': 1.6673753261566162},
   'rotation': {'x': 0.10064005851745605,
    'y': 180.00376892089844,
    'z': 359.9861755371094},
   'visible': False,
   'isInteractable': False,
   'receptacle': False,
   'toggleable': False,
   'isToggled': False,
   'breakable': False,
   'isBroken': False,
   'canFillWithLiquid': False,
   'isFilledWithLiquid': False,
   'fillLiquid': None,
   'dirtyable': False,
   'isDirty': False,
   'canBeUsedUp': False,
   'isUsedUp': False,
   'cookable': False,
   'isCooked': False,
   'temperature': 'RoomTemp',
   'isHeatSource': False,
   'isColdSource': False,
   'sliceable': False,
   'isSliced': False,
   'openable': False,
   'isOpen': False,
   'openness': 0.0,
   'pickupable': True,
   'isPickedUp': False,
   'moveable': False,
   'mass': 0.800000011920929,
   'salientMaterials': ['Metal', 'Plastic', 'Glass'],
   'receptacleObjectIds':