In [1]:
import prior

dataset = prior.load_dataset("procthor-10k")
dataset

    pip install --upgrade ai2thor
Alternatively, to downgrade to the old version of ProcTHOR-10K, run:
   prior.load_dataset("procthor-10k", revision="ab3cacd0fc17754d4c080a3fd50b18395fae8647")


Loading train: 100%|██████████| 10000/10000 [00:00<00:00, 11485.66it/s]
Loading val: 100%|██████████| 1000/1000 [00:00<00:00, 12006.26it/s]
Loading test: 100%|██████████| 1000/1000 [00:00<00:00, 12101.98it/s]


DatasetDict(
    train=Dataset(
    dataset=procthor-dataset,
    size=10000,
    split=train
),
    val=Dataset(
    dataset=procthor-dataset,
    size=1000,
    split=val
),
    test=Dataset(
    dataset=procthor-dataset,
    size=1000,
    split=test
)
)

In [2]:
import numpy as np
def teleport(controller, target=None):
    event = controller.step("GetReachablePositions")
    reachable_positions = event.metadata["actionReturn"]
    # Pick a random target
    if target is None:
        target = np.random.choice(reachable_positions)

    event = controller.step(
        action="TeleportFull",
        x=target["x"],
        y=target["y"],
        z=target["z"],
        rotation={"x": 0, "y": 0, "z": 0},
        horizon=0,
        standing=True
    )

    return event


In [3]:
from rl import PPO, CLIPNovelty, ClipEnv, ActorCritic
from models import FrozenResNetEncoder, SlidingWindowTransformerActor, SlidingWindowTransformerCritic
from cons import FEAT_DIM, NUM_ACTIONS

ENTROPY_COEF = 0.05

ppo = PPO(ENTROPY_COEF)
encoder = FrozenResNetEncoder(project_to_out_dim=False)
actor = SlidingWindowTransformerActor(FEAT_DIM, NUM_ACTIONS) # Not important
critic = SlidingWindowTransformerCritic(FEAT_DIM) # Not important
clip_novelty = CLIPNovelty()
clip_env = ClipEnv(clip_novelty)
clip_actor_critic = ActorCritic(encoder, actor, critic)

In [4]:
from rl import inference, teleport
import torch
from ai2thor.controller import Controller


def get_distribution(ppo, obs_seq, actions_seq, actor_critic):
    dist = torch.distributions.Categorical(probs=torch.tensor([0.5, 0.25, 0.25]))
    return dist

all_obs = []

for i in range(12):

    # ------------------------------------------------------------
    # Pick a different random environment from your dataset
    # ------------------------------------------------------------
    idx = torch.randint(0, len(dataset["train"]), (1,)).item()
    house = dataset["train"][idx]

    controller = Controller(scene=house, snapToGrid=False, rotateStepDegrees=30)

    try:
        # teleport agent inside this new house
        event = teleport(controller)
        init_pos = event.metadata["agent"]["position"]

        # ------------------------------------------------------------
        # Run inference in THIS environment
        # ------------------------------------------------------------
        obs = inference(
            get_distribution=get_distribution,
            controller=controller,
            ppo=ppo,
            init_position=init_pos,
            env=clip_env,
            actor_critic=clip_actor_critic,
            plot=False,
            n=32
        )

        all_obs.append(torch.stack(obs, dim=0))

    finally:
        # Always clean up controller
        controller.stop()


all_obs_tensor = torch.cat(all_obs, dim=0)
embedding = encoder(all_obs_tensor.unsqueeze(0)).squeeze(0)
from sklearn.decomposition import PCA
import numpy as np

pca = PCA(n_components=FEAT_DIM)
pca.fit(embedding.detach().cpu().numpy())
W = pca.components_.astype(np.float32)



<Figure size 200x6400 with 0 Axes>

<Figure size 200x6400 with 0 Axes>

<Figure size 200x6400 with 0 Axes>

<Figure size 200x6400 with 0 Axes>

<Figure size 200x6400 with 0 Axes>

<Figure size 200x6400 with 0 Axes>

<Figure size 200x6400 with 0 Axes>

<Figure size 200x6400 with 0 Axes>

<Figure size 200x6400 with 0 Axes>

<Figure size 200x6400 with 0 Axes>

<Figure size 200x6400 with 0 Axes>

<Figure size 200x6400 with 0 Axes>

In [5]:
from rl import PPO, ActorCritic, Env, RolloutBuffer, ClipEnv, CLIPNovelty
from models import LSTMActor, LSTMCritic, FrozenResNetEncoder, SlidingWindowTransformerActor, SlidingWindowTransformerCritic
from cons import MINIBATCHES, EPISODE_STEPS, FEAT_DIM, NUM_ACTIONS, DEVICE

In [6]:
import torch
import wandb
import numpy as np
from ai2thor.controller import Controller
from rl import save_actor_critic, RolloutBuffer, MINIBATCHES, EPISODE_STEPS, DEVICE, teleport


def train(
    name: str,
    ppo: PPO,
    env_cls,
    actor_critic: ActorCritic,
    total_updates=10,
    num_envs_per_minibatch=8
):
    # ------------------------------------------------------------
    # W&B init
    # ------------------------------------------------------------
    run = wandb.init(
        reinit="finish_previous",
        entity="viriyadhika1",
        project="cv-final-project",
        name=name,
        config={},
    )

    rewards = []
    episode_rewards = []

    try:
        for upd in range(total_updates):
            buf = RolloutBuffer()

            # ======================================================
            # ============= MINI-BATCH LOOP ========================
            # ======================================================
            for mb in range(1):

                # ======================================================
                # Run num_envs_per_minibatch EPISODES sequentially
                # ======================================================
                for env_idx in range(num_envs_per_minibatch):

                    # ----------------------------------------------------
                    # Create ONE controller for this episode
                    # ----------------------------------------------------
                    idx = torch.randint(0, len(dataset["train"]), (1,)).item()
                    house = dataset["train"][idx]

                    controller = Controller(scene=house, snapToGrid=False, rotateStepDegrees=30, renderInstanceSegmentation=True)
                    env = env_cls()

                    try:
                        # Start the episode
                        event = teleport(controller)
                        episode_seq = []
                        actions_seq = []
                        episode_reward = 0.0

                        # ======================================================
                        # ============= EPISODE LOOP ============================
                        # ======================================================
                        for t in range(1, EPISODE_STEPS + 1):
                            with torch.no_grad():
                                # Get observation and encode
                                obs_t = ppo.obs_from_event(event)
                                obs_enc = actor_critic.actor_critic_encoder(
                                    obs_t.unsqueeze(0).unsqueeze(0)
                                ).squeeze(0).squeeze(0)
    
                                # Build sequence tensor
                                obs_seq = torch.stack(
                                    episode_seq + [obs_enc], dim=0
                                ).unsqueeze(0).to(DEVICE)

                            # Random init action (new version style)
                            if len(actions_seq) == 0:
                                a0 = torch.randint(0, NUM_ACTIONS, (1, 1)).item()
                                actions_seq.append(a0)

                            actions_tensor = torch.tensor(
                                actions_seq, dtype=torch.long
                            ).unsqueeze(0).to(DEVICE)

                            # Policy forward
                            logits, value = ppo.act_and_value(obs_seq, actions_tensor, actor_critic)
                            dist = torch.distributions.Categorical(logits=logits)
                            action_idx = dist.sample().item()
                            logp = dist.log_prob(torch.tensor(action_idx, device=logits.device)).item()

                            # Environment step
                            event, reward = env.step_env(controller, action_idx)
                            done = (t == EPISODE_STEPS)

                            # Store transition
                            buf.add(obs_enc, action_idx, logp, reward, value, done)

                            episode_seq.append(obs_enc)
                            actions_seq.append(action_idx)

                            rewards.append(reward)
                            episode_reward += reward / EPISODE_STEPS

                            wandb.log({"reward": reward})

                            # Episode ended
                            if done:
                                env.reset()
                                if np.random.rand() > 0.5:
                                    event = teleport(controller)

                        wandb.log({"episode_reward": episode_reward})
                        episode_rewards.append(episode_reward)

                    finally:
                        controller.stop()

            # ======================================================
            # PPO UPDATE
            # ======================================================
            ppo.ppo_update(buf, actor_critic)

            # Save model periodically
            if (upd + 1) % 10 == 0:
                save_actor_critic(actor_critic, f"data/{name}_{upd}.pt")

            # Save latest
            save_actor_critic(actor_critic, f"data/{name}.pt")

            print(f"Update {upd+1}/{total_updates} — steps: {len(buf)}")

    finally:
        run.finish()

    return buf, rewards, episode_rewards


In [7]:
from rl import PPO, SegmentationNovelty, ClipEnv, ActorCritic
from models import FrozenResNetPCAEncoder, SlidingWindowTransformerActor, SlidingWindowTransformerCritic
from cons import FEAT_DIM, NUM_ACTIONS, DEVICE

ENTROPY_COEF = 0.05

ppo = PPO(ENTROPY_COEF)
encoder = FrozenResNetPCAEncoder(FEAT_DIM, torch.from_numpy(W), device=DEVICE)
actor = SlidingWindowTransformerActor(FEAT_DIM, NUM_ACTIONS)
critic = SlidingWindowTransformerCritic(FEAT_DIM)
def env_cls():
    clip_novelty = SegmentationNovelty()
    clip_env = ClipEnv(clip_novelty) 
    return clip_env

clip_actor_critic = ActorCritic(encoder, actor, critic)

In [8]:
buf, rewards, episode_rewards = train(
    "multi_env.pt",
    ppo,
    env_cls,
    clip_actor_critic,
    total_updates=5,
    num_envs_per_minibatch=4
)

[34m[1mwandb[0m: Currently logged in as: [33mviriyadhika-putra[0m ([33mviriyadhika1[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
episode_reward,▃█▁▂
reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁

0,1
episode_reward,0.12495
reward,0.09542


OutOfMemoryError: CUDA out of memory. Tried to allocate 16.00 MiB. GPU 0 has a total capacity of 31.37 GiB of which 77.50 MiB is free. Process 173924 has 6.36 GiB memory in use. Process 3169638 has 20.92 GiB memory in use. Including non-PyTorch memory, this process has 3.87 GiB memory in use. Of the allocated memory 3.22 GiB is allocated by PyTorch, and 40.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)