In [1]:
DISCRIMINATOR_LR = 0.00005
DISCRIMINATOR_WD = 1e-5
POLICY_LR = 0.0001
LAMBDA_1 = 0.1
LAMBDA_2 = 0.1
# TEACHER_FORCING_RATIO = 0.2
EPOCH = 25
# BATCH_SIZE = 8
TASK = 'coffee'
VARIANCE = '1'    # D0 or D1

## Setup

In [2]:
import gym
import torch
import random
import numpy as np
import torch.nn as nn
import torch.optim as optim

from tqdm import tqdm
from torch.autograd import Variable
from torch.distributions import Categorical, Normal

In [3]:
import os
# First, we need to decide where to host the runtime storage
USE_GDRIVE_STORAGE = True

if not USE_GDRIVE_STORAGE:
    # Option 1: use the colab runtime storage. All trained model and downloaded
    # will disappear after you disconnect from the runtime.
    WS_DIR = "/content/"
else:
    # Option 2: use your google drive as the runtime storage. You need to grant
    # permission for the colab runtime to access your google drive. You also
    # need to decide on a workspace for robomimic. In this case, we've created a
    # folder called "colab_ws" in Google Drive.
    from google.colab import drive
    drive.mount('/content/drive')
    WS_DIR = "/content/drive/MyDrive/colab_ws/" # this should be the absolute path, e.g., "/content/drive/MyDrive/my-ws/"
    assert os.path.exists(WS_DIR)

%cd $WS_DIR

  and should_run_async(code)


Mounted at /content/drive
/content/drive/MyDrive/colab_ws


In [4]:
# Install the basic requirements
%cd $WS_DIR
!pip install -e robosuite/
!pip install -e robomimic/
!pip install -e mimicgen_environments/
!pip install mujoco

import sys
import os
sys.path.append('./robosuite/')
sys.path.append('./robomimic/')
sys.path.append('./mimicgen_environments/')

/content/drive/MyDrive/colab_ws
Obtaining file:///content/drive/MyDrive/colab_ws/robosuite
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Collecting mujoco>=2.3.0 (from robosuite==1.4.1)
  Downloading mujoco-3.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
Collecting pynput (from robosuite==1.4.1)
  Downloading pynput-1.7.6-py2.py3-none-any.whl (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.2/89.2 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
Collecting glfw (from mujoco>=2.3.0->robosuite==1.4.1)
  Downloading glfw-2.7.0-py2.py27.py3.py30.py31.py32.py33.py34.py35.py36.py37.py38-none-manylinux2014_x86_64.whl (21

In [5]:
import json
import h5py
import numpy as np

# enforce that the dataset exists
DATA_DIR = WS_DIR + "mimicgen_data/"
dataset_path = os.path.join(DATA_DIR, TASK + "_d" + VARIANCE + "_100.hdf5")
assert os.path.exists(dataset_path)

## Architectures

In [6]:
class Policy(nn.Module):
    def __init__(self, img_dim, combined_dim, action_dim):
        super(Policy, self).__init__()
        self.im = nn.Sequential(
            nn.Conv2d(img_dim[2], 32, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Flatten()
        )

        self.feature_dim = 128 * (img_dim[0] // 8 + 1) * (img_dim[1] // 8 + 1)

        self.fully_connected = nn.Sequential(
            nn.Linear(self.feature_dim + combined_dim, 1024),
            nn.LeakyReLU(0.2),
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 64)
        )

        self.action_mean = nn.Linear(64, action_dim)
        self.action_mean.weight.data.mul_(0.1)
        self.action_mean.bias.data.mul_(0.1)

        self.action_log_std = nn.Parameter(abs(torch.randn(1, action_dim)))

    def forward(self, img, combined_features):
        x = self.im(img)
        x = torch.cat([x, combined_features], dim=-1)
        x = self.fully_connected(x)

        action_mean = self.action_mean(x)
        # print("\nPolicy action mean: ")
        # print(action_mean.shape)
        # print()
        return action_mean

    def log_std_and_std(self, action_means):
        action_log_std = self.action_log_std.expand_as(action_means)
        return action_log_std, torch.exp(action_log_std)

In [7]:
class Discriminator(nn.Module):
    def __init__(self, img_dim, combined_dim, action_dim):
        super(Discriminator, self).__init__()
        self.im = nn.Sequential(
            nn.Conv2d(img_dim[2], 32, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            nn.Flatten()
        )

        self.feature_dim = 128 * (img_dim[0] // 8 + 1) * (img_dim[1] // 8 + 1)

        self.fully_connected = nn.Sequential(
            nn.Linear(self.feature_dim + combined_dim + action_dim, 256),
            nn.LeakyReLU(),
            nn.Linear(256, 128),
            nn.LeakyReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, img, combined_features, action):
        x = self.im(img)
        x = torch.cat([x, combined_features, action], dim=-1)
        predictions = self.fully_connected(x)

        # print("\nDiscriminator predictions: ")
        # print(predictions.shape)
        # print()
        return predictions

In [8]:
import torch
from torch.utils.data import Dataset, DataLoader

class MimicGenDataset(Dataset):
    def __init__(self, file_path):
        self.file = h5py.File(file_path, 'r')
        self.demos = list(self.file['data'].keys())
        inds = np.argsort([int(elem.split('_')[1]) for elem in self.demos])
        self.demos = [self.demos[i] for i in inds]
        random.shuffle(self.demos)
        self.combined_dim = None

    def __len__(self):
        return len(self.demos)

    def __getitem__(self, idx):
        demo_key = self.demos[idx]
        demo_grp = self.file['data'][demo_key]

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        e_imgs = torch.tensor(demo_grp['obs']['agentview_image'][:], dtype=torch.float32).permute(0, 3, 1, 2).to(device)
        e_obj = torch.tensor(demo_grp['obs']['object'][:], dtype=torch.float32).to(device)
        e_eef_pos = torch.tensor(demo_grp['obs']['robot0_eef_pos'][:], dtype=torch.float32).to(device)
        e_eef_quat = torch.tensor(demo_grp['obs']['robot0_eef_quat'][:], dtype=torch.float32).to(device)
        e_joint_pos = torch.tensor(demo_grp['obs']['robot0_joint_pos'][:], dtype=torch.float32).to(device)
        e_actions = torch.tensor(demo_grp['actions'][:], dtype=torch.float32).to(device)
        e_states = torch.tensor(demo_grp['states'][:], dtype=torch.float32).to(device)

        combined_features = torch.cat([e_obj, e_eef_pos, e_eef_quat, e_joint_pos, e_states], dim=1)

        return e_imgs, combined_features, e_actions

    def close(self):
        self.file.close()

## Training Setup

In [9]:
import json
import h5py
import numpy as np

# enforce that the dataset exists
DATA_DIR = WS_DIR + "mimicgen_data/"
dataset_path = os.path.join(DATA_DIR, TASK + "_d" + VARIANCE + "_100.hdf5")
assert os.path.exists(dataset_path)

In [10]:
dataset = MimicGenDataset(dataset_path)
# data_loader = DataLoader(
#     dataset=dataset,
#     sampler=None,
#     batch_size=BATCH_SIZE,
#     shuffle=True,
#     num_workers=0,
#     drop_last=True
# )

In [19]:
# def train_info_gail(policy, discriminator, lambda1, lambda2, expert, demo):
def train_info_gail(policy, discriminator, lambda1, lambda2):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    discriminator.to(device)
    policy.to(device)

    discriminator_optimizer = optim.SGD(discriminator.parameters(), lr=DISCRIMINATOR_LR, weight_decay=DISCRIMINATOR_WD)
    policy_optimizer = optim.Adam(policy.parameters(), lr=POLICY_LR)

    adv_loss = nn.BCELoss()
    mse_loss = nn.MSELoss()

    for epoch in range(EPOCH):
        # random.shuffle(demos)

        # i = 0
        # progress_bar = tqdm(enumerate(demos), total=len(demos), desc=f"Epoch {epoch}")
        progress_bar = tqdm(enumerate(dataset), total=len(dataset), desc=f"Epoch {epoch}")
        # progress_bar = tqdm(data_loader, desc=f"Epoch {epoch}")
        # for i, demo_key in progress_bar:
        for i, data in progress_bar:
        # for data in progress_bar:
            relay_buffer = list()

            e_imgs, combined_features, e_actions = data

            real_labels = torch.ones(e_actions.shape[0], 1).to(device)
            fake_labels = torch.zeros(e_actions.shape[0], 1).to(device)

            fake_actions = policy(e_imgs, combined_features).detach()
            num_actions = fake_actions.shape[0]



            # count = 0
            # for j in range(num_actions):
            #   er = torch.abs(mse_loss(fake_actions[j], e_actions[j]) / torch.mean(fake_actions[j])).detach().cpu().numpy()
            #   # print(er)
            #   # print()
            #   if er <= 10:
            #     relay_buffer.append(fake_actions[j].detach().cpu().numpy())
            #   else:
            #     count += 1
            #     augmented_action = [TEACHER_FORCING_RATIO * fake_actions[j][k] + (1-TEACHER_FORCING_RATIO) * e_actions[j][k] for k in range(7)]
            #     # relay_buffer.append(e_actions[j].cpu().numpy())
            #     relay_buffer.append(augmented_action)
            # e_percent = count/num_actions * 100

            # relay_buffer = torch.tensor(relay_buffer, dtype=torch.float32).to(device)
            # # print(relay_buffer.shape)


            # Update Discriminator
            discriminator_optimizer.zero_grad()
            real_predictions = discriminator(e_imgs, combined_features, e_actions)
            # real_predictions = discriminator(e_imgs, combined_features, relay_buffer)
            fake_actions = policy(e_imgs, combined_features)
            fake_predictions = discriminator(e_imgs, combined_features, fake_actions)
            # fake_predictions = discriminator(e_imgs, combined_features, relay_buffer)
            discriminator_loss = adv_loss(real_predictions, real_labels) + adv_loss(fake_predictions, fake_labels)
            discriminator_loss.backward()
            discriminator_optimizer.step()

            # Update Generator
            policy_optimizer.zero_grad()
            fake_actions = policy(e_imgs, combined_features)
            fake_predictions = discriminator(e_imgs, combined_features, fake_actions)
            # fake_predictions = discriminator(e_imgs, combined_features, relay_buffer)
            generator_loss = lambda1 * adv_loss(fake_predictions, real_labels)
            bc_loss = lambda2 * mse_loss(fake_actions, e_actions)
            # bc_loss = lambda2 * mse_loss(fake_actions, relay_buffer)
            total_policy_loss = generator_loss + bc_loss
            total_policy_loss.backward()
            policy_optimizer.step()

            # progress_bar.set_description(f"""Epoch {epoch+1} | Iter {i} | Discriminator Loss: {discriminator_loss.item():.4f} | Policy Loss: {total_policy_loss.item():.4f} | Expert Percentage: {e_percent:.2f}%""")
            progress_bar.set_description(f"""Epoch {epoch+1} | Iter {i} | Discriminator Loss: {discriminator_loss.item():.4f} | Policy Loss: {total_policy_loss.item():.4f}""")

            # i += 1

In [12]:
import robomimic.utils.obs_utils as ObsUtils

# We normally need to make sure robomimic knows which observations are images (for the
# data processing pipeline). This is usually inferred from your training config, but
# since we are just playing back demonstrations, we just need to initialize robomimic
# with a dummy spec.
dummy_spec = dict(
    obs=dict(
            low_dim=["robot0_eef_pos"],
            rgb=[],
        ),
)
ObsUtils.initialize_obs_utils_with_obs_specs(obs_modality_specs=dummy_spec)



using obs modality: low_dim with keys: ['robot0_eef_pos']
using obs modality: rgb with keys: []


In [13]:
import robomimic.utils.env_utils as EnvUtils

f = h5py.File(dataset_path, "r")
env_meta = json.loads(f["data"].attrs["env_args"])

# create simulation environment from environment metedata
env = EnvUtils.create_env_from_metadata(
    env_meta=env_meta,
    render=False,            # no on-screen rendering
    render_offscreen=True,   # off-screen rendering to support rendering video frames
)

  ROBOSUITE_DEFAULT_LOGGER.warn("No private macro file found!")


Created environment with name Coffee_D1
Action size is 7


In [14]:
_, combined_features, _ = dataset[0]
combined_dim = combined_features.shape[1]
print(combined_dim)

  and should_run_async(code)


118


## Training

In [20]:
policy = Policy((84, 84, 3), combined_dim, 7)
discriminator = Discriminator((84, 84, 3), combined_dim, 7)
lambda1 = LAMBDA_1
lambda2 = LAMBDA_2

# train_info_gail(policy, discriminator, lambda1, lambda2, f, demos)
train_info_gail(policy, discriminator, lambda1, lambda2)

policy_checkpoint = WS_DIR + TASK + '_policy.pt'
discriminator_checkpoint = WS_DIR + TASK + '_discriminator.pt'
torch.save(policy, policy_checkpoint)
torch.save(discriminator, discriminator_checkpoint)

Epoch 1 | Iter 99 | Discriminator Loss: 1.3934 | Policy Loss: 0.0880: 100%|██████████| 100/100 [00:25<00:00,  3.85it/s]
Epoch 2 | Iter 99 | Discriminator Loss: 1.3928 | Policy Loss: 0.0858: 100%|██████████| 100/100 [00:10<00:00,  9.98it/s]
Epoch 3 | Iter 99 | Discriminator Loss: 1.3926 | Policy Loss: 0.0858: 100%|██████████| 100/100 [00:09<00:00, 10.12it/s]
Epoch 4 | Iter 99 | Discriminator Loss: 1.3921 | Policy Loss: 0.0831: 100%|██████████| 100/100 [00:09<00:00, 10.04it/s]
Epoch 5 | Iter 99 | Discriminator Loss: 1.3917 | Policy Loss: 0.0844: 100%|██████████| 100/100 [00:10<00:00,  9.93it/s]
Epoch 6 | Iter 99 | Discriminator Loss: 1.3913 | Policy Loss: 0.0818: 100%|██████████| 100/100 [00:09<00:00, 10.06it/s]
Epoch 7 | Iter 99 | Discriminator Loss: 1.3910 | Policy Loss: 0.0819: 100%|██████████| 100/100 [00:09<00:00, 10.03it/s]
Epoch 8 | Iter 99 | Discriminator Loss: 1.3907 | Policy Loss: 0.0820: 100%|██████████| 100/100 [00:09<00:00, 10.04it/s]
Epoch 9 | Iter 99 | Discriminator Loss: 

## Rollout

In [21]:
import imageio

# prepare to write playback trajectories to video
video_path = os.path.join(DATA_DIR, TASK + "_playback.mp4")
video_writer = imageio.get_writer(video_path, fps=20)

In [22]:
def playback_trajectory(demo_key):
    """
    Simple helper function to playback the trajectory stored under the hdf5 group @demo_key and
    write frames rendered from the simulation to the active @video_writer.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # robosuite datasets store the ground-truth simulator states under the "states" key.
    # We will use the first one, alone with the model xml, to reset the environment to
    # the initial configuration before playing back actions.
    init_state = f["data/{}/states".format(demo_key)][0]
    model_xml = f["data/{}".format(demo_key)].attrs["model_file"]
    initial_state_dict = dict(states=init_state, model=model_xml)

    # reset to initial state
    env.reset_to(initial_state_dict)

    # playback actions one by one, and render frames

    e_actions = f["data/{}/actions".format(demo_key)][:]

    # Generate trajectories
    state = env.get_state()['states']
    ob = env.get_observation()

    img = env.render(mode="rgb_array", height=84, width=84, camera_name="agentview")
    obj = ob['object']

    eef_pos = ob['robot0_eef_pos']
    eef_quat = ob['robot0_eef_quat']
    joint_pos = ob['robot0_joint_pos']


    for t in tqdm(range(500)):
        img_tensor = torch.tensor(img.copy(), dtype=torch.float32).unsqueeze(0).permute(0, 3, 1, 2).to(device)

        obj_tensor = torch.tensor(obj, dtype=torch.float32).unsqueeze(0).to(device)
        state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(device)
        eef_pos_tensor = torch.tensor(eef_pos, dtype=torch.float32).unsqueeze(0).to(device)
        eef_quat_tensor = torch.tensor(eef_quat, dtype=torch.float32).unsqueeze(0).to(device)
        joint_pos_tensor = torch.tensor(joint_pos, dtype=torch.float32).unsqueeze(0).to(device)


        combined_features = torch.cat([obj_tensor, eef_pos_tensor, eef_quat_tensor, joint_pos_tensor, state_tensor], dim=1)


        action_mean = policy(img_tensor, combined_features)
        action_log_std, action_std = policy.log_std_and_std(action_mean)
        action = action_mean
        env.step(action.detach().cpu().numpy()[0])
        state = env.get_state()['states']
        ob = env.get_observation()
        img = env.render(mode="rgb_array", height=84, width=84, camera_name="agentview")
        obj = ob['object']
        video_img = env.render(mode="rgb_array", height=512, width=512, camera_name="agentview")
        video_writer.append_data(video_img)

In [23]:
# playback the first 3 demos and record them to a video file
for ep in dataset.demos[:3]:
    print("Playing back demo key: {}".format(ep))
    playback_trajectory(ep)

# done writing video
video_writer.close()

Playing back demo key: demo_778


  self.pid = _posixsubprocess.fork_exec(
100%|██████████| 500/500 [02:06<00:00,  3.94it/s]


Playing back demo key: demo_899


100%|██████████| 500/500 [02:01<00:00,  4.10it/s]


Playing back demo key: demo_816


100%|██████████| 500/500 [02:04<00:00,  4.03it/s]


In [24]:
# view the trajectories!
from IPython.display import Video
Video(video_path, embed=True)

## More Training

In [25]:
print("Robot: hi, my performance is kinda bad; can you give me more expert demonstrations?")

Robot: hi, my performance is kinda bad; can you give me more expert demonstrations?


In [30]:
# enforce that the dataset exists
DATA_DIR = WS_DIR + "mimicgen_data/"
dataset_path = os.path.join(DATA_DIR, TASK + "_d" + VARIANCE + "_200.hdf5")
print(dataset_path)
assert os.path.exists(dataset_path)
dataset = MimicGenDataset(dataset_path)

  and should_run_async(code)


/content/drive/MyDrive/colab_ws/mimicgen_data/coffee_d1_200.hdf5


In [31]:
_, combined_features, _ = dataset[0]
combined_dim = combined_features.shape[1]
print(combined_dim)

118


In [32]:
# policy = Policy((84, 84, 3), combined_dim, 7)
# discriminator = Discriminator((84, 84, 3), combined_dim, 7)
# lambda1 = LAMBDA_1
# lambda2 = LAMBDA_2

# train_info_gail(policy, discriminator, lambda1, lambda2, f, demos)
train_info_gail(policy, discriminator, lambda1, lambda2)

policy_checkpoint = WS_DIR + TASK + '_policy_phase2.pt'
discriminator_checkpoint = WS_DIR + TASK + '_discriminator_phase2.pt'
torch.save(policy, policy_checkpoint)
torch.save(discriminator, discriminator_checkpoint)

Epoch 1 | Iter 99 | Discriminator Loss: 1.3910 | Policy Loss: 0.0829: 100%|██████████| 100/100 [00:10<00:00,  9.58it/s]
Epoch 2 | Iter 99 | Discriminator Loss: 1.3893 | Policy Loss: 0.0786: 100%|██████████| 100/100 [00:10<00:00,  9.71it/s]
Epoch 3 | Iter 99 | Discriminator Loss: 1.3884 | Policy Loss: 0.0788: 100%|██████████| 100/100 [00:10<00:00,  9.51it/s]
Epoch 4 | Iter 99 | Discriminator Loss: 1.3879 | Policy Loss: 0.0767: 100%|██████████| 100/100 [00:10<00:00,  9.68it/s]
Epoch 5 | Iter 99 | Discriminator Loss: 1.3875 | Policy Loss: 0.0802: 100%|██████████| 100/100 [00:10<00:00,  9.75it/s]
Epoch 6 | Iter 99 | Discriminator Loss: 1.3874 | Policy Loss: 0.0766: 100%|██████████| 100/100 [00:10<00:00,  9.79it/s]
Epoch 7 | Iter 99 | Discriminator Loss: 1.3873 | Policy Loss: 0.0749: 100%|██████████| 100/100 [00:10<00:00,  9.83it/s]
Epoch 8 | Iter 99 | Discriminator Loss: 1.3872 | Policy Loss: 0.0752: 100%|██████████| 100/100 [00:10<00:00,  9.92it/s]
Epoch 9 | Iter 99 | Discriminator Loss: 

In [37]:
DATA_DIR = WS_DIR + "mimicgen_data/"
dataset_path = os.path.join(DATA_DIR, TASK + "_d" + VARIANCE + "_200.hdf5")
f = h5py.File(dataset_path, "r")
demos = list(f["data"].keys())

In [38]:
import imageio

# prepare to write playback trajectories to video
video_path = os.path.join(DATA_DIR, TASK + "_playback_phase2.mp4")
video_writer = imageio.get_writer(video_path, fps=20)

In [39]:
# playback the first 3 demos and record them to a video file
for ep in dataset.demos[:3]:
    print("Playing back demo key: {}".format(ep))
    playback_trajectory(ep)

# done writing video
video_writer.close()

Playing back demo key: demo_850


  self.pid = _posixsubprocess.fork_exec(
100%|██████████| 500/500 [02:03<00:00,  4.04it/s]


Playing back demo key: demo_89


100%|██████████| 500/500 [01:58<00:00,  4.21it/s]


Playing back demo key: demo_289


100%|██████████| 500/500 [02:16<00:00,  3.66it/s]


In [40]:
# view the trajectories!
from IPython.display import Video
Video(video_path, embed=True)

## More Training2

In [41]:
print("Robot: hi, my performance is kinda bad; can you give me more expert demonstrations?")

Robot: hi, my performance is kinda bad; can you give me more expert demonstrations?


In [42]:
# enforce that the dataset exists
DATA_DIR = WS_DIR + "mimicgen_data/"
dataset_path = os.path.join(DATA_DIR, TASK + "_d" + VARIANCE + "_200.hdf5")
print(dataset_path)
assert os.path.exists(dataset_path)
dataset = MimicGenDataset(dataset_path)

/content/drive/MyDrive/colab_ws/mimicgen_data/coffee_d1_200.hdf5


In [43]:
_, combined_features, _ = dataset[0]
combined_dim = combined_features.shape[1]
print(combined_dim)

118


In [44]:
# policy = Policy((84, 84, 3), combined_dim, 7)
# discriminator = Discriminator((84, 84, 3), combined_dim, 7)
# lambda1 = LAMBDA_1
# lambda2 = LAMBDA_2

# train_info_gail(policy, discriminator, lambda1, lambda2, f, demos)
train_info_gail(policy, discriminator, lambda1, lambda2)

policy_checkpoint = WS_DIR + TASK + '_policy_phase3.pt'
discriminator_checkpoint = WS_DIR + TASK + '_discriminator_phase3.pt'
torch.save(policy, policy_checkpoint)
torch.save(discriminator, discriminator_checkpoint)

Epoch 1 | Iter 99 | Discriminator Loss: 1.3872 | Policy Loss: 0.0710: 100%|██████████| 100/100 [00:09<00:00, 10.10it/s]
Epoch 2 | Iter 99 | Discriminator Loss: 1.3872 | Policy Loss: 0.0713: 100%|██████████| 100/100 [00:09<00:00, 10.21it/s]
Epoch 3 | Iter 99 | Discriminator Loss: 1.3871 | Policy Loss: 0.0706: 100%|██████████| 100/100 [00:09<00:00, 10.30it/s]
Epoch 4 | Iter 99 | Discriminator Loss: 1.3871 | Policy Loss: 0.0705: 100%|██████████| 100/100 [00:09<00:00, 10.34it/s]
Epoch 5 | Iter 99 | Discriminator Loss: 1.3871 | Policy Loss: 0.0705: 100%|██████████| 100/100 [00:09<00:00, 10.26it/s]
Epoch 6 | Iter 99 | Discriminator Loss: 1.3871 | Policy Loss: 0.0704: 100%|██████████| 100/100 [00:09<00:00, 10.32it/s]
Epoch 7 | Iter 99 | Discriminator Loss: 1.3871 | Policy Loss: 0.0721: 100%|██████████| 100/100 [00:09<00:00, 10.26it/s]
Epoch 8 | Iter 99 | Discriminator Loss: 1.3871 | Policy Loss: 0.0712: 100%|██████████| 100/100 [00:09<00:00, 10.23it/s]
Epoch 9 | Iter 99 | Discriminator Loss: 

In [45]:
DATA_DIR = WS_DIR + "mimicgen_data/"
dataset_path = os.path.join(DATA_DIR, TASK + "_d" + VARIANCE + "_200.hdf5")
f = h5py.File(dataset_path, "r")
demos = list(f["data"].keys())

In [46]:
import imageio

# prepare to write playback trajectories to video
video_path = os.path.join(DATA_DIR, TASK + "_playback_phase3.mp4")
video_writer = imageio.get_writer(video_path, fps=20)

In [47]:
# playback the first 3 demos and record them to a video file
for ep in dataset.demos[:3]:
    print("Playing back demo key: {}".format(ep))
    playback_trajectory(ep)

# done writing video
video_writer.close()

Playing back demo key: demo_630


100%|██████████| 500/500 [02:00<00:00,  4.14it/s]


Playing back demo key: demo_628


100%|██████████| 500/500 [02:05<00:00,  3.98it/s]


Playing back demo key: demo_155


100%|██████████| 500/500 [02:05<00:00,  3.98it/s]


In [48]:
# view the trajectories!
from IPython.display import Video
Video(video_path, embed=True)

## More Training3

In [49]:
print("Robot: hi, my performance is kinda bad; can you give me more expert demonstrations?")

Robot: hi, my performance is kinda bad; can you give me more expert demonstrations?


In [50]:
# enforce that the dataset exists
DATA_DIR = WS_DIR + "mimicgen_data/"
dataset_path = os.path.join(DATA_DIR, TASK + "_d" + VARIANCE + "_200.hdf5")
print(dataset_path)
assert os.path.exists(dataset_path)
dataset = MimicGenDataset(dataset_path)

/content/drive/MyDrive/colab_ws/mimicgen_data/coffee_d1_200.hdf5


In [51]:
_, combined_features, _ = dataset[0]
combined_dim = combined_features.shape[1]
print(combined_dim)

118


In [53]:
# policy = Policy((84, 84, 3), combined_dim, 7)
# discriminator = Discriminator((84, 84, 3), combined_dim, 7)
# lambda1 = LAMBDA_1
# lambda2 = LAMBDA_2

# train_info_gail(policy, discriminator, lambda1, lambda2, f, demos)
train_info_gail(policy, discriminator, lambda1, lambda2)

policy_checkpoint = WS_DIR + TASK + '_policy_phase4.pt'
discriminator_checkpoint = WS_DIR + TASK + '_discriminator_phase4.pt'
torch.save(policy, policy_checkpoint)
torch.save(discriminator, discriminator_checkpoint)

Epoch 1 | Iter 99 | Discriminator Loss: 1.3868 | Policy Loss: 0.0717: 100%|██████████| 100/100 [00:09<00:00, 10.35it/s]
Epoch 2 | Iter 99 | Discriminator Loss: 1.3868 | Policy Loss: 0.0715: 100%|██████████| 100/100 [00:09<00:00, 10.39it/s]
Epoch 3 | Iter 99 | Discriminator Loss: 1.3868 | Policy Loss: 0.0716: 100%|██████████| 100/100 [00:09<00:00, 10.31it/s]
Epoch 4 | Iter 99 | Discriminator Loss: 1.3867 | Policy Loss: 0.0720: 100%|██████████| 100/100 [00:10<00:00,  9.90it/s]
Epoch 5 | Iter 99 | Discriminator Loss: 1.3867 | Policy Loss: 0.0725: 100%|██████████| 100/100 [00:09<00:00, 10.04it/s]
Epoch 6 | Iter 99 | Discriminator Loss: 1.3867 | Policy Loss: 0.0721: 100%|██████████| 100/100 [00:09<00:00, 10.15it/s]
Epoch 7 | Iter 99 | Discriminator Loss: 1.3867 | Policy Loss: 0.0716: 100%|██████████| 100/100 [00:10<00:00,  9.85it/s]
Epoch 8 | Iter 99 | Discriminator Loss: 1.3867 | Policy Loss: 0.0714: 100%|██████████| 100/100 [00:10<00:00,  9.78it/s]
Epoch 9 | Iter 99 | Discriminator Loss: 

In [54]:
DATA_DIR = WS_DIR + "mimicgen_data/"
dataset_path = os.path.join(DATA_DIR, TASK + "_d" + VARIANCE + "_200.hdf5")
f = h5py.File(dataset_path, "r")
demos = list(f["data"].keys())

In [55]:
import imageio

# prepare to write playback trajectories to video
video_path = os.path.join(DATA_DIR, TASK + "_playback_phase4.mp4")
video_writer = imageio.get_writer(video_path, fps=20)

In [56]:
# playback the first 3 demos and record them to a video file
for ep in dataset.demos[:3]:
    print("Playing back demo key: {}".format(ep))
    playback_trajectory(ep)

# done writing video
video_writer.close()

Playing back demo key: demo_533


100%|██████████| 500/500 [02:19<00:00,  3.59it/s]


Playing back demo key: demo_79


100%|██████████| 500/500 [02:04<00:00,  4.00it/s]


Playing back demo key: demo_742


100%|██████████| 500/500 [02:04<00:00,  4.00it/s]


In [57]:
# view the trajectories!
from IPython.display import Video
Video(video_path, embed=True)

## Evaluation

In [None]:
eval_path = os.path.join(DATA_DIR, TASK + "_d" + VARIANCE + "_test.hdf5")
testset = MimicGenDataset(eval_path)
test_loader = DataLoader(
    dataset=testset,
    sampler=None,
    batch_size=8,
    shuffle=True,
    num_workers=0,
    drop_last=True
)

In [None]:
_, combined_features, _ = testset[0]
combined_dim = combined_features.shape[1]

In [None]:
policy_test = Policy((84, 84, 3), combined_dim, 7)
discrininator_test = Discriminator((84, 84, 3), combined_dim, 7)
policy_checkpoint = WS_DIR + TASK + '_policy.pt'
disciminator_checkpoint = WS_DIR + TASK + '_discriminator.pt'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
policy_test = torch.load(policy_checkpoint).to(device)
discriminator_test = torch.load(discriminator_checkpoint).to(device)

policy_test.eval()
discriminator_test.eval()
test_g_loss = 0.0
num_batches = 0

adv_loss = nn.BCELoss()
mse_loss = nn.MSELoss()

# progress_bar = tqdm(test_loader, desc=f'Eval')
progress_bar = tqdm(enumerate(testset), total=len(testset), desc=f"Evaluation")

for i, data in progress_bar:
# for data in progress_bar:
    e_imgs, combined_features, e_actions = data

    real_labels = torch.ones(e_actions.shape[0], 1).to(device)
    fake_labels = torch.zeros(e_actions.shape[0], 1).to(device)

    fake_actions = policy_test(e_imgs, combined_features)
    fake_predictions = discriminator_test(e_imgs, combined_features, fake_actions)
    generator_loss = lambda1 * adv_loss(fake_predictions, real_labels)
    bc_loss = lambda2 * mse_loss(fake_actions, e_actions)
    total_policy_loss = generator_loss + bc_loss

    # p_actions = policy_test(e_imgs, combined_features)
    # g_loss = nn.MSELoss()(p_actions, e_actions)
    test_g_loss += total_policy_loss

    num_batches += 1

    progress_bar.set_description(f"Average total policy Loss: {test_g_loss/num_batches:.4f}")