# Prepare data for loading

In [1]:
import numpy as np
import os
import cv2
from typing import Dict

DATA_ROOT_DIR = "data"
BENCHMARK_DIR = "saved_benchmarks"

In [2]:
# Every file in data will be considered and preprocessed
file_names = os.listdir(f"{DATA_ROOT_DIR}/{BENCHMARK_DIR}/")

In [3]:
file_names = [f for f in file_names if "Breakout" in f]

In [4]:
def split_data(data: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
    """Splits the data into episodes."""
    episode_ends = np.argwhere(data['dones']).squeeze()
    episodes = {}
    for name, data_item in data.items():
        if data_item.shape:
            episodes[name] = np.split(data_item, episode_ends)
        else:
            episodes[name] = data_item

    return episodes

In [5]:
def encode_video(renders: np.ndarray, path: str) -> None:
    """
    Encodes renders of shape [n_frames, height, width, 3] into a .mp4 video and
    saves it at path.
    """
    # Create video in H264 format
    out = cv2.VideoWriter(
        f"{path}.mp4",
        cv2.VideoWriter_fourcc(*"avc1"),
        24,
        (renders.shape[2], renders.shape[1]),
    )
    for render in renders:
        out.write(render)
    out.release()

In [23]:
for file_name in file_names:
    dir_name = f"{DATA_ROOT_DIR}/episodes/{os.path.splitext(file_name)[0]}"
    if not os.path.isdir(dir_name):
        os.makedirs(dir_name, exist_ok=True)
    data = np.load(f"{DATA_ROOT_DIR}/{BENCHMARK_DIR}/{file_name}", allow_pickle=True)
    episode_data = split_data(data)
    # Save as single episodes with an incrementing index
    for episode_idx, _ in enumerate(episode_data['dones']):
        save_episode = {}
        for name, _ in episode_data.items():
            print(name)
            if name == 'additional_metrics':
                continue
            save_episode[name] = episode_data[name][episode_idx]
        os.makedirs(f"{DATA_ROOT_DIR}/rewards/{os.path.splitext(file_name)[0]}", exist_ok=True)
        np.savez(f"{dir_name}/benchmark_{episode_idx}.npz", **save_episode)

obs
rewards
dones
actions
renders
infos
feature_extractor_buffer
probs
episode_rewards
episode_lengths
additional_metrics
obs
rewards
dones
actions
renders
infos
feature_extractor_buffer
probs
episode_rewards
episode_lengths
additional_metrics
obs
rewards
dones
actions
renders
infos
feature_extractor_buffer
probs
episode_rewards
episode_lengths
additional_metrics
obs
rewards
dones
actions
renders
infos
feature_extractor_buffer
probs
episode_rewards
episode_lengths
additional_metrics
obs
rewards
dones
actions
renders
infos
feature_extractor_buffer
probs
episode_rewards
episode_lengths
additional_metrics
obs
rewards
dones
actions
renders
infos
feature_extractor_buffer
probs
episode_rewards
episode_lengths
additional_metrics
obs
rewards
dones
actions
renders
infos
feature_extractor_buffer
probs
episode_rewards
episode_lengths
additional_metrics
obs
rewards
dones
actions
renders
infos
feature_extractor_buffer
probs
episode_rewards
episode_lengths
additional_metrics
obs
rewards
dones
action

In [7]:
for file_name in file_names:
    data = np.load(f"{DATA_ROOT_DIR}/{BENCHMARK_DIR}/{file_name}", allow_pickle=True)
    episode_data = split_data(data)
    for episode_idx, renders in enumerate(episode_data['renders']):
        dir_name = f"{DATA_ROOT_DIR}/renders/{os.path.splitext(file_name)[0]}"
        if not os.path.isdir(dir_name):
            os.makedirs(dir_name)
        encode_video(renders, f"{dir_name}/{episode_idx}")
    
    # Free memory
    del data
    del episode_data

[h264_videotoolbox @ 0x7fed4520f200] Error: cannot create compression session: -12903
[h264_videotoolbox @ 0x7fed4520f200] Try -allow_sw 1. The hardware encoder may be busy, or not supported.
[ERROR:0@495.676] global /Users/runner/work/opencv-python/opencv-python/opencv/modules/videoio/src/cap_ffmpeg_impl.hpp (2822) open Could not open codec h264_videotoolbox, error: Unspecified error
[ERROR:0@495.679] global /Users/runner/work/opencv-python/opencv-python/opencv/modules/videoio/src/cap_ffmpeg_impl.hpp (2839) open VIDEOIO/FFMPEG: Failed to initialize VideoWriter
2023-07-03 10:57:22.897 Python[55148:4268933] AVF: AVAssetWriter status: Cannot Save
[h264_videotoolbox @ 0x7fed44239000] Error: cannot create compression session: -12903
[h264_videotoolbox @ 0x7fed44239000] Try -allow_sw 1. The hardware encoder may be busy, or not supported.
[ERROR:0@495.758] global /Users/runner/work/opencv-python/opencv-python/opencv/modules/videoio/src/cap_ffmpeg_impl.hpp (2822) open Could not open codec h26

In [24]:
os.makedirs(f"{DATA_ROOT_DIR}/rewards", exist_ok=True)

# Write thumbnails#
for file_name in file_names:
    data = np.load(f"{DATA_ROOT_DIR}/{BENCHMARK_DIR}/{file_name}", allow_pickle=True)
    episode_data = split_data(data)
    for episode_idx, renders in enumerate(episode_data['renders']):
        dir_name = f"{DATA_ROOT_DIR}/thumbnails/{os.path.splitext(file_name)[0]}"
        if not os.path.isdir(dir_name):
            os.makedirs(dir_name)
        
        # Save first rame of the episode, first convert to BGR
        save_image = cv2.cvtColor(renders[0], cv2.COLOR_RGB2BGR)
        cv2.imwrite(f"{dir_name}/{episode_idx}.jpg", save_image)

        # Save step rewards to "rewards_{}.npy" arrays
        os.makedirs(f"{DATA_ROOT_DIR}/rewards/{os.path.splitext(file_name)[0]}", exist_ok=True)
        np.save(f"{DATA_ROOT_DIR}/rewards/{os.path.splitext(file_name)[0]}/rewards_{episode_idx}.npy", np.cumsum(episode_data['rewards'][episode_idx]))

    # Free memory
    del data
    del episode_data