In [5]:
#Imports & hyperparameters


import pickle
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader

# Training device
device     = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Path to expert trajectories (simple_push)
data_path  = "Expert_data/expert_data_rllib_simple_push.pickle"

# BC hyperparameters
batch_size = 64
lr         = 1e-3
epochs     = 50
hidden_dim = 64

print("Device:", device)
print("Data path:", data_path)

Device: cpu
Data path: Expert_data/expert_data_rllib_simple_push.pickle


In [6]:
# Load & inspect expert_data
# Load the pickled expert trajectories
with open(data_path, "rb") as f:
    expert_data = pickle.load(f)

# Sanity check
print("expert_data type:", type(expert_data))
print("Agents available:", list(expert_data.keys()))

# For each agent, print number of transitions and example shapes
for agent, data in expert_data.items():
    n_states  = len(data["states"])
    n_actions = len(data["actions"])
    # Peek at shapes
    state_shape  = np.array(data["states"][0]).shape
    action_shape = np.array(data["actions"][0]).shape
    print(f"  {agent}: {n_states} transitions, state shape={state_shape}, action shape={action_shape}")

expert_data type: <class 'dict'>
Agents available: ['adversary_0', 'agent_0']
  adversary_0: 1200 transitions, state shape=(8,), action shape=()
  agent_0: 1200 transitions, state shape=(19,), action shape=()


In [7]:
# SingleAgentExpertDataset + DataLoaders
class SingleAgentExpertDataset(Dataset):
    def __init__(self, states, actions):
        # Convert lists of numpy arrays into torch tensors
        self.states  = torch.from_numpy(np.array(states)).float()
        self.actions = torch.from_numpy(np.array(actions)).long()

    def __len__(self):
        return len(self.states)

    def __getitem__(self, idx):
        return self.states[idx], self.actions[idx]

# Instantiate datasets and loaders
datasets = {}
loaders  = {}

for agent, data in expert_data.items():
    ds     = SingleAgentExpertDataset(data["states"], data["actions"])
    loader = DataLoader(ds, batch_size=batch_size, shuffle=True)
    datasets[agent] = ds
    loaders[agent]  = loader

    # Print a batch shape for sanity
    s_batch, a_batch = next(iter(loader))
    print(f"{agent} batch shapes → states: {s_batch.shape}, actions: {a_batch.shape}")

adversary_0 batch shapes → states: torch.Size([64, 8]), actions: torch.Size([64])
agent_0 batch shapes → states: torch.Size([64, 19]), actions: torch.Size([64])
