# FlowPolicy on Kitchen Dataset - Google Colab

Notebook lengkap: load data → training (CFM loss) → simulasi & evaluasi.

**Sumber data (satu dari):**
- **.npy**: file `all_observations.npy` dan `all_actions.npy` di `DATA_DIR`.
- **.mjl**: file log MuJoCo (playdata) di `DATA_DIR` atau subfolder, mis. `kitchen_demos_multitask/friday_kettle_switch_hinge_slide/kitchen_playdata_*.mjl`. Jika .npy tidak ada, loader akan cari semua `.mjl` secara rekursif dan load otomatis.
- **Sintetis**: jika keduanya tidak ada dan `USE_SYNTHETIC_IF_MISSING=True`.

**Cara pakai di Colab:**
1. Upload folder `kitchen` (berisi .npy dan/atau subfolder berisi .mjl) ke Colab, atau jalankan dengan data sintetis.
2. Jika pakai Google Drive: mount Drive lalu set `DATA_DIR` ke path folder kitchen di Drive.
3. Jalankan semua cell secara berurutan.

## 1. Setup & Install

In [None]:
# Optional: Mount Google Drive (jika data kitchen ada di Drive)
# from google.colab import drive
# drive.mount('/content/drive')
# DATA_DIR = "/content/drive/MyDrive/skripsi/kitchen"  # sesuaikan path

# Jika tidak mount Drive, pakai folder di Colab (upload manual) atau data sintetis
DATA_DIR = "/content/kitchen"  # ganti jika upload ke path lain
USE_MJL_IF_MISSING = True   # True = coba load dari file .mjl jika .npy tidak ada
USE_SYNTHETIC_IF_MISSING = True  # True = pakai data sintetis jika .npy dan .mjl tidak ada

!pip install -q tqdm

In [None]:
import os
import math
import json
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

## 2. Konfigurasi & Seed

In [None]:
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Dimensi Kitchen (sesuaikan jika pakai dataset lain)
STATE_DIM = 59
ACTION_DIM = 9

# Training
BATCH_SIZE = 64
LR = 1e-3
NUM_EPOCHS = 50
VAL_RATIO = 0.1

# FlowPolicy
HIDDEN_DIM = 512
TIME_EMBED_DIM = 128
NUM_INFERENCE_STEPS = 10
CFM_EPS = 1e-2
CFM_DELTA = 1e-2
CFM_NUM_SEGMENTS = 2
CFM_BOUNDARY = 1
CFM_ALPHA = 1e-5

SAVE_DIR = "/content/checkpoints"
os.makedirs(SAVE_DIR, exist_ok=True)

## 3. Dataset Kitchen

In [None]:
def load_kitchen_data(data_dir):
    data = {}
    for name in ["all_observations", "all_actions", "observations_seq", "actions_seq", "existence_mask"]:
        path = os.path.join(data_dir, f"{name}.npy")
        if os.path.isfile(path):
            try:
                arr = np.load(path, allow_pickle=True)
                if hasattr(arr, 'shape') and len(arr.shape) > 0:
                    data[name] = arr
            except Exception as e:
                print(f"Skip {name}: {e}")
    return data

def flatten_trajectories(obs, actions, mask=None):
    if obs.ndim == 2:
        return obs, actions
    if mask is not None:
        obs = obs[mask].reshape(-1, obs.shape[-1])
        actions = actions[mask].reshape(-1, actions.shape[-1])
    else:
        obs = obs.reshape(-1, obs.shape[-1])
        actions = actions.reshape(-1, actions.shape[-1])
    return obs, actions

def create_synthetic_kitchen(num_trajs=50, steps=100, obs_dim=59, action_dim=9, seed=42):
    rng = np.random.default_rng(seed)
    obs = rng.standard_normal((num_trajs, steps, obs_dim)).astype(np.float32)
    actions = rng.standard_normal((num_trajs, steps, action_dim)).astype(np.float32)
    return obs, actions

class KitchenDataset(Dataset):
    def __init__(self, obs, actions):
        if obs.ndim == 3:
            obs = obs.reshape(-1, obs.shape[-1])
            actions = actions.reshape(-1, actions.shape[-1])
        self.obs = torch.as_tensor(obs, dtype=torch.float32)
        self.actions = torch.as_tensor(actions, dtype=torch.float32)
    def __len__(self):
        return len(self.obs)
    def __getitem__(self, idx):
        return self.obs[idx], self.actions[idx]

# Load data
raw = load_kitchen_data(DATA_DIR)
if raw and "all_observations" in raw and "all_actions" in raw:
    obs = raw["all_observations"]
    actions = raw["all_actions"]
    mask = raw.get("existence_mask")
    obs, actions = flatten_trajectories(obs, actions, mask)
    state_dim, action_dim = obs.shape[1], actions.shape[1]
    print(f"Loaded kitchen data (.npy): {obs.shape[0]} samples, state_dim={state_dim}, action_dim={action_dim}")
else:
    obs, actions = None, None
    if USE_MJL_IF_MISSING:
        try:
            from kitchen_dataset import load_kitchen_from_mjl, find_mjl_files
            mjl_paths = find_mjl_files(DATA_DIR)
            if mjl_paths:
                obs, actions = load_kitchen_from_mjl(DATA_DIR, state_dim=STATE_DIM, action_dim=ACTION_DIM)
                if obs is not None:
                    state_dim, action_dim = obs.shape[1], actions.shape[1]
                    print(f"Loaded kitchen data (.mjl): {len(mjl_paths)} file(s), {obs.shape[0]} samples")
        except ImportError:
            pass
    if obs is None and USE_SYNTHETIC_IF_MISSING:
        print("Data tidak ditemukan. Menggunakan data sintetis.")
        obs, actions = create_synthetic_kitchen(obs_dim=STATE_DIM, action_dim=ACTION_DIM)
        obs = obs.reshape(-1, obs.shape[-1])
        actions = actions.reshape(-1, actions.shape[-1])
        state_dim, action_dim = STATE_DIM, ACTION_DIM
    if obs is None:
        raise FileNotFoundError(f"Tidak ada data di {DATA_DIR}. Upload .npy/.mjl atau set USE_SYNTHETIC_IF_MISSING=True.")

dataset = KitchenDataset(obs, actions)
n = len(dataset)
n_val = max(1, int(n * VAL_RATIO))
n_train = n - n_val
train_ds, val_ds = torch.utils.data.random_split(dataset, [n_train, n_val])
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
print(f"Train: {n_train}, Val: {n_val}")

## 4. Arsitektur FlowPolicy (Linear + ReLU)

In [None]:
class SinusoidalPosEmb(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim
    def forward(self, x):
        device = x.device
        half_dim = self.dim // 2
        emb = math.log(10000) / (half_dim - 1)
        emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
        emb = x[:, None] * emb[None, :]
        emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
        return emb

class FlowPolicyVelocityNet(nn.Module):
    def __init__(self, state_dim, action_dim, hidden_dim=512, time_embed_dim=128):
        super().__init__()
        self.time_embed = nn.Sequential(
            SinusoidalPosEmb(time_embed_dim),
            nn.Linear(time_embed_dim, time_embed_dim * 2),
            nn.ReLU(inplace=True),
            nn.Linear(time_embed_dim * 2, time_embed_dim),
        )
        input_dim = state_dim + action_dim + time_embed_dim
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_dim // 2, action_dim),
        )
    def forward(self, state, noisy_action, t):
        t_emb = self.time_embed(t)
        x = torch.cat([state, noisy_action, t_emb], dim=-1)
        return self.net(x)

class FlowPolicyModel(nn.Module):
    def __init__(self, velocity_net, action_dim, num_inference_steps=10, eps=1e-2):
        super().__init__()
        self.velocity_net = velocity_net
        self.action_dim = action_dim
        self.num_inference_steps = num_inference_steps
        self.eps = eps
    def forward(self, state):
        was_1d = state.dim() == 1
        if was_1d:
            state = state.unsqueeze(0)
        B = state.shape[0]
        device = state.device
        z = torch.randn(B, self.action_dim, device=device)
        dt = 1.0 / self.num_inference_steps
        for i in range(self.num_inference_steps):
            num_t = i / self.num_inference_steps * (1 - self.eps) + self.eps
            t = torch.ones(B, device=device) * num_t
            pred = self.velocity_net(state, z, t)
            z = z + pred * dt
        if was_1d:
            z = z.squeeze(0)
        return z

def compute_cfm_loss(velocity_net, states, actions, device, eps=1e-2, delta=1e-2, num_segments=2, boundary=1, alpha=1e-5):
    B = actions.shape[0]
    a0 = torch.randn_like(actions)
    t = torch.rand(B, device=device) * (1 - eps) + eps
    r = torch.clamp(t + delta, max=1.0)
    t_exp = t.unsqueeze(-1)
    r_exp = r.unsqueeze(-1)
    xt = t_exp * actions + (1 - t_exp) * a0
    xr = r_exp * actions + (1 - r_exp) * a0
    segments = torch.linspace(0, 1, num_segments + 1, device=device)
    seg_idx = torch.searchsorted(segments, t, side="left").clamp(min=1)
    seg_ends = segments[seg_idx]
    seg_ends_exp = seg_ends.unsqueeze(-1)
    vt = velocity_net(states, xt, t)
    vr = velocity_net(states, xr, r)
    vr = torch.nan_to_num(vr)
    ft = xt + (seg_ends_exp - t_exp) * vt
    x_at_seg = seg_ends_exp * actions + (1 - seg_ends_exp) * a0
    if boundary == 0:
        fr = x_at_seg
    else:
        mask_b = t_exp < boundary
        fr = mask_b * (xr + (seg_ends_exp - r_exp) * vr) + (~mask_b) * x_at_seg
    losses_f = torch.mean((ft - fr) ** 2, dim=-1)
    if boundary == 0:
        losses_v = torch.zeros_like(losses_f)
    else:
        mask_b = (t < boundary).unsqueeze(-1)
        mask_far = ((seg_ends - t) > 1.01 * delta).unsqueeze(-1)
        losses_v = torch.mean(mask_b * mask_far * (vt - vr) ** 2, dim=-1)
    return torch.mean(losses_f + alpha * losses_v)

## 5. Inisialisasi Model & Optimizer

In [None]:
velocity_net = FlowPolicyVelocityNet(
    state_dim=state_dim,
    action_dim=action_dim,
    hidden_dim=HIDDEN_DIM,
    time_embed_dim=TIME_EMBED_DIM,
).to(device)

flowpolicy_model = FlowPolicyModel(
    velocity_net=velocity_net,
    action_dim=action_dim,
    num_inference_steps=NUM_INFERENCE_STEPS,
    eps=CFM_EPS,
).to(device)

total_params = sum(p.numel() for p in velocity_net.parameters())
print(f"FlowPolicy - Total parameters: {total_params:,}")

optimizer = Adam(velocity_net.parameters(), lr=LR)

## 6. Training Loop

In [None]:
best_val_loss = float("inf")
train_loss_history = []
val_loss_history = []

for epoch in range(1, NUM_EPOCHS + 1):
    velocity_net.train()
    epoch_train_losses = []
    for states, actions in tqdm(train_loader, desc=f"Epoch {epoch}/{NUM_EPOCHS} [Train]", leave=False):
        states = states.to(device)
        actions = actions.to(device)
        loss = compute_cfm_loss(
            velocity_net, states, actions, device,
            eps=CFM_EPS, delta=CFM_DELTA,
            num_segments=CFM_NUM_SEGMENTS, boundary=CFM_BOUNDARY, alpha=CFM_ALPHA,
        )
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_train_losses.append(loss.item())

    velocity_net.eval()
    epoch_val_losses = []
    with torch.no_grad():
        for states, actions in tqdm(val_loader, desc=f"Epoch {epoch}/{NUM_EPOCHS} [Val]", leave=False):
            states = states.to(device)
            actions = actions.to(device)
            loss = compute_cfm_loss(
                velocity_net, states, actions, device,
                eps=CFM_EPS, delta=CFM_DELTA,
                num_segments=CFM_NUM_SEGMENTS, boundary=CFM_BOUNDARY, alpha=CFM_ALPHA,
            )
            epoch_val_losses.append(loss.item())

    avg_train = np.mean(epoch_train_losses)
    avg_val = np.mean(epoch_val_losses)
    train_loss_history.append(avg_train)
    val_loss_history.append(avg_val)

    if avg_val < best_val_loss:
        best_val_loss = avg_val
        torch.save({
            "velocity_net_state_dict": velocity_net.state_dict(),
            "state_dim": state_dim,
            "action_dim": action_dim,
            "hidden_dim": HIDDEN_DIM,
            "time_embed_dim": TIME_EMBED_DIM,
            "num_inference_steps": NUM_INFERENCE_STEPS,
            "cfm_eps": CFM_EPS,
        }, os.path.join(SAVE_DIR, "flow_policy_best.pt"))
        print(f"  -> Model saved (val_loss={avg_val:.6f})")

    print(f"EPOCH {epoch}/{NUM_EPOCHS} - Train Loss: {avg_train:.6f}, Val Loss: {avg_val:.6f}")

print(f"\nTraining selesai. Best val_loss = {best_val_loss:.6f}")

## 7. Plot Kurva Training & Simpan Metrik

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))
plt.plot(train_loss_history, label="Train Loss")
plt.plot(val_loss_history, label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Consistency Flow Matching Loss")
plt.title("FlowPolicy Training Curves - Kitchen")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig(os.path.join(SAVE_DIR, "flowpolicy_training_curves.png"), dpi=150)
plt.show()

with open(os.path.join(SAVE_DIR, "train_metrics.json"), "w") as f:
    json.dump({"train_loss": train_loss_history, "val_loss": val_loss_history}, f, indent=2)
print("Metrik training disimpan ke train_metrics.json")

## 8. Simulasi & Evaluasi

In [None]:
# Load model terbaik
ckpt = torch.load(os.path.join(SAVE_DIR, "flow_policy_best.pt"), map_location=device)
velocity_net.load_state_dict(ckpt["velocity_net_state_dict"])
flowpolicy_model.eval()

# Evaluasi pada seluruh dataset (atau subset)
all_obs = dataset.obs
all_actions = dataset.actions
max_eval = min(5000, len(all_obs))  # batas sampel evaluasi
obs_eval = all_obs[:max_eval].to(device)
targets = all_actions[:max_eval]

with torch.no_grad():
    preds = flowpolicy_model(obs_eval).cpu().numpy()
targets_np = targets.numpy()

mse = np.mean((preds - targets_np) ** 2)
mae = np.mean(np.abs(preds - targets_np))
ss_tot = np.sum((targets_np - targets_np.mean(axis=0)) ** 2)
ss_res = np.sum((targets_np - preds) ** 2)
r2 = float(1 - ss_res / (ss_tot + 1e-12))

print("=" * 60)
print("  SIMULASI & EVALUASI - FlowPolicy on Kitchen")
print("=" * 60)
print(f"  MSE:  {mse:.6f}")
print(f"  MAE:  {mae:.6f}")
print(f"  R²:   {r2:.6f}")
print("=" * 60)

## 9. Simpan Hasil Evaluasi & Download (opsional)

In [None]:
RESULTS_DIR = "/content/evaluation_results"
os.makedirs(RESULTS_DIR, exist_ok=True)

with open(os.path.join(RESULTS_DIR, "flow_policy_metrics.txt"), "w") as f:
    f.write("FlowPolicy on Kitchen - Simulation & Evaluation\n")
    f.write("=" * 60 + "\n")
    f.write(f"MSE:  {mse:.6f}\n")
    f.write(f"MAE:  {mae:.6f}\n")
    f.write(f"R²:   {r2:.6f}\n")

np.savez(
    os.path.join(RESULTS_DIR, "flow_policy_predictions.npz"),
    predictions=preds,
    targets=targets_np,
)

print("Metrik dan prediksi disimpan di", RESULTS_DIR)

# Download dari Colab (uncomment jika perlu)
# from google.colab import files
# files.download(os.path.join(SAVE_DIR, "flow_policy_best.pt"))
# files.download(os.path.join(SAVE_DIR, "flowpolicy_training_curves.png"))
# files.download(os.path.join(RESULTS_DIR, "flow_policy_metrics.txt"))

## 10. Video Hasil Pengujian (max 280 timesteps) & Download

In [None]:
# Rekam video rollout FlowPolicy dengan max 280 timestep
MAX_VIDEO_TIMESTEPS = 280
VIDEO_FPS = 10
video_path = os.path.join(RESULTS_DIR, f"flowpolicy_rollout_{MAX_VIDEO_TIMESTEPS}.mp4")

try:
    from simulate_and_evaluate import record_rollout_video
    video_path = record_rollout_video(flowpolicy_model, dataset, device, video_path, max_timesteps=MAX_VIDEO_TIMESTEPS, fps=VIDEO_FPS) or video_path
except ImportError:
    import matplotlib
    matplotlib.use("Agg")
    import matplotlib.pyplot as plt
    flowpolicy_model.eval()
    obs_v = dataset.obs[:MAX_VIDEO_TIMESTEPS]
    acts_gt = dataset.actions[:MAX_VIDEO_TIMESTEPS]
    with torch.no_grad():
        preds_v = flowpolicy_model(obs_v.to(device)).cpu().numpy()
    obs_v = obs_v.numpy()
    acts_gt = acts_gt.numpy()
    T = len(obs_v)
    frames = []
    for t in range(T):
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 5))
        fig.suptitle(f"FlowPolicy Rollout — timestep {t+1}/{T}")
        ax1.bar(range(min(8, obs_v.shape[1])), obs_v[t, :8], color="steelblue", alpha=0.8)
        ax1.set_ylabel("state (subset)")
        x = np.arange(min(9, preds_v.shape[1]))
        ax2.bar(x - 0.18, preds_v[t, :len(x)], 0.35, label="pred", color="green", alpha=0.8)
        ax2.bar(x + 0.18, acts_gt[t, :len(x)], 0.35, label="target", color="orange", alpha=0.8)
        ax2.set_ylabel("action")
        ax2.legend()
        plt.tight_layout()
        fig.canvas.draw()
        img = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8).reshape(fig.canvas.get_width_height()[::-1] + (3,))
        frames.append(img)
        plt.close(fig)
    import imageio
    imageio.mimsave(video_path.replace(".mp4", ".gif"), frames, fps=VIDEO_FPS, loop=0)
    video_path = video_path.replace(".mp4", ".gif")
    print(f"Video disimpan: {video_path}")

# Download video (di Colab akan trigger download ke komputer)
try:
    from google.colab import files
    files.download(video_path)
    print("Download video berhasil.")
except ImportError:
    print(f"Untuk download manual, ambil file: {os.path.abspath(video_path)}")