In [1]:
import pickle as pkl
import numpy as np
import matplotlib.pyplot as plt
import os

# set matplotlib style
plt.style.use('seaborn-v0_8-paper')

In [2]:
environments = ['HalfCheetah-v5', 'Hopper-v5', 'Swimmer-v5', 'Ant-v5', 'ALE/BeamRider-v5', 'ALE/MsPacman-v5', 'ALE/Enduro-v5']
discount_factors = {
    'HalfCheetah-v5': 0.99,
    'Hopper-v5': 0.999,
    'Swimmer-v5': 0.9999,
    'Ant-v5': 0.98,
    'ALE/BeamRider-v5': 0.99,
    'ALE/MsPacman-v5': 0.99,
    'ALE/Enduro-v5': 0.99
}

## Plots for evaluative feedback

In [None]:
# Compute statistics env per env (due to memory constraints)
results = {}

# helper function
def discounted_sum_numpy(rewards, discount_factor):
    rewards = np.array(rewards)
    n = len(rewards)
    discount_factors = discount_factor ** np.arange(n)
    return np.sum(rewards * discount_factors)

for env in environments:

    # get all feedback files matching the environment (they contain the name and the seed)
    env_name = env if 'ALE' not in env else env.replace('/', '-')
    feedback_files = [f for f in os.listdir("feedback") if env_name in f]

    # Load data
    for file in feedback_files:
        _, seed = file.split('.')[0].split('_')[-1]
        with open(f"feedback/{file}", 'rb') as f:
            data = pkl.load(f)
            if env not in results:
                results[env] = []
            
            # compute discounted sum of rewards
            index = np.argsort([np.sum([e[2] for e in d]) for d in data["segments"]])
            ordered_gt_rews = np.array([discounted_sum_numpy([e[2] for e in d], discount_factors[env]) for d in data["segments"]])[index]

            # opt gaps
            opt_gaps = -np.array([d for d in data["opt_gaps"]])[index]

            # ratings
            ratings = np.array([d for d in data["ratings"]])[index]

            # demo rewards
            ordered_demo_rews = np.array([discounted_sum_numpy([e[2] for e in d], discount_factors[env]) for d in data["demos"]])[index]

            # store data
            results[env].append((ordered_gt_rews, opt_gaps, ratings, ordered_demo_rews))

In [None]:
# make grid plot showing opt_gaps (y-axis) vs. discounted sum of rewards (x-axis) for each environment

fig, axs = plt.subplots(2, 4, figsize=(20, 10))

for i, env in enumerate(environments):
    ax = axs[i // 4, i % 4]
    for data in results[env]:
        ax.plot(data[0], data[1], 'o', alpha=0.5)
    ax.set_title(env)
    ax.set_xlabel("Discounted sum of rewards")
    ax.set_ylabel("Optimality gap")

plt.tight_layout()

plt.savefig("opt_gaps_vs_discounted_sum_of_rewards.png")

In [None]:
# make grid plot showing ratings (y-axis) vs. discounted sum of rewards (x-axis) for each environment

fig, axs = plt.subplots(2, 4, figsize=(20, 10))

for i, env in enumerate(environments):
    ax = axs[i // 4, i % 4]
    for data in results[env]:
        ax.plot(data[0], data[2], 'o', alpha=0.5)
    ax.set_title(env)
    ax.set_xlabel("Discounted sum of rewards")
    ax.set_ylabel("Rating")

plt.tight_layout()

plt.savefig("ratings_vs_discounted_sum_of_rewards.png")

## Plots for demonstrative feedback

In [None]:
# Instead of ratings, we now want to compare the total rewards between original data ("segments") and demonstrations ("demos").
# make grid plot showing the total rewards for each segment and demo

fig, axs = plt.subplots(2, 4, figsize=(20, 10))

for i, env in enumerate(environments):
    ax = axs[i // 4, i % 4]
    for data in results[env]:
        steps = np.arange(len(data[0]))
        ax.plot(steps, data[0], label="Segment", alpha=0.5, color='blue')
        ax.plot(steps, data[3], label="Demo", alpha=0.5, color='orange')
    ax.set_title(env)
    ax.set_xlabel("Segment")
    ax.set_ylabel("Discounted sum of rewards: Segment vs. Demo")
    ax.legend()

plt.tight_layout()

plt.savefig("segment_vs_demo_rewards.png")

In [None]:
# In the feedback_video directory, we have some selected segments. The videos are named according to the scheme "segment_env_seed_segmentindex_optgap.mp4" or "demo_env_seed_segmentindex_optgap.mp4".
# We want to create a plot showing some presentative frames (5 for each video) of the segments and demos (Here ther demos are the corrections). It's fine to just take one seed for each environment.
# Ideally, we want a grid of 5 columns and 2 rows for each environment (one row for the segments and one for the demos). And then save a plot for each environment.

import cv2

for env in environments:

    # get all feedback files matching the environment (they contain the name and the seed)
    env_name = env if 'ALE' not in env else env.replace('/', '-')
    # get all video files matching the environment and seed
    video_files = [f for f in os.listdir("feedback_videos") if env_name in f and seed in f and "segment" in f]
    demo_files = [f for f in os.listdir("feedback_videos") if env_name in f and seed in f and "demo" in f]

    # we want to find the pair of segment and demo videos that have the same segment index but the highest difference in optgap
    opt_gap_diffs = []
    for file in video_files:
        _, _, _, segindex, optgap = file.split('.')[0].split('_')
        
        # find the corresponding demo video (remember the optgap can be different)
        for video in demo_files:
            if segindex in video:
                _, _, _, segindex, optgap_demo = video.split('.')[0].split('_')
                opt_gap_diffs.append((file, video, float(optgap) - float(optgap_demo)))
                break

    # sort by the difference in optgap
    opt_gap_diffs = sorted(opt_gap_diffs, key=lambda x: x[2])

    # take the pair with the highest difference
    video_file = opt_gap_diffs[-1][0]
    demo_file = opt_gap_diffs[-1][1]
   
    cap = cv2.VideoCapture(f"feedback_videos/{video_file}")
    frames = []
    for i in range(5):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * 10)
        ret, frame = cap.read()
        if ret:
            frames.append(frame)
    cap.release()
    fig, axs = plt.subplots(1, 5, figsize=(20, 4))
    for i, frame in enumerate(frames):
        axs[i].imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        axs[i].axis('off')
    plt.tight_layout()
    plt.savefig(f"feedback_videos/{video_file.replace('.mp4', '.png')}")
    plt.close()

    cap = cv2.VideoCapture(f"feedback_videos/{demo_file}")
    frames = []
    for i in range(5):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * 10)
        ret, frame = cap.read()
        if ret:
            frames.append(frame)
    cap.release()
    fig, axs = plt.subplots(1, 5, figsize=(20, 4))
    for i, frame in enumerate(frames):
        axs[i].imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        axs[i].axis('off')
    plt.tight_layout()
    plt.savefig(f"feedback_videos/{demo_file.replace('.mp4', '.png')}")
    plt.close()

## Plots for Descriptive feedback

In [None]:
# two types of plots: fraction of positive/negative attributions vs. discounted sum of rewards (attributions have the same shape and value range as segment observations, let's classify each attribution value as significant
# if it is at least 50% as large as the observation value). Then we count the fraction of significant positive and negative attributions for each segment and plot them against the discounted sum of rewards.

# Compute statistics env per env (due to memory constraints)
results = {}

# helper function
def discounted_sum_numpy(rewards, discount_factor):
    rewards = np.array(rewards)
    n = len(rewards)
    discount_factors = discount_factor ** np.arange(n)
    return np.sum(rewards * discount_factors)

for env in environments:

    # get all feedback files matching the environment (they contain the name and the seed)
    env_name = env if 'ALE' not in env else env.replace('/', '-')
    feedback_files = [f for f in os.listdir("feedback") if env_name in f]

    # Load data
    for file in feedback_files:
        _, seed = file.split('.')[0].split('_')[-1]
        with open(f"feedback/{file}", 'rb') as f:
            data = pkl.load(f)
            if env not in results:
                results[env] = []
            
            # compute discounted sum of rewards
            index = np.argsort([np.sum([e[2] for e in d]) for d in data["segments"]])
            ordered_gt_rews = np.array([discounted_sum_numpy([e[2] for e in d], discount_factors[env]) for d in data["segments"]])[index]

            # opt gaps
            attributions = np.array([d for d in data["descriptions"]])[index]
            #significant_pos = np.array([np.sum(np.abs(a) > 0.5 * np.abs(s)) for a, s in zip(attributions, data["segments"])])
            significant_positives = np.array([np.sum(a > 0.5 * s) for a, s in zip(attributions, data["segments"])])
            significant_negatives = np.array([np.sum(a < -0.5 * s) for a, s in zip(attributions, data["segments"])])

            # store data
            results[env].append((ordered_gt_rews, significant_positives, significant_negatives))

In [None]:
# make grid plot showing significant positive attributions (y-axis) vs. discounted sum of rewards (x-axis) for each environment

fig, axs = plt.subplots(2, 4, figsize=(20, 10))

for i, env in enumerate(environments):

    ax = axs[i // 4, i % 4]
    for data in results[env]:
        ax.plot(data[0], data[1], 'o', alpha=0.5)
    ax.set_title(env)
    ax.set_xlabel("Discounted sum of rewards")
    ax.set_ylabel("Significant positive attributions")

plt.tight_layout()

plt.savefig("significant_positive_attributions_vs_discounted_sum_of_rewards.png")

In [None]:
# make grid plot showing significant negative attributions (y-axis) vs. discounted sum of rewards (x-axis) for each environment

fig, axs = plt.subplots(2, 4, figsize=(20, 10))

for i, env in enumerate(environments):
    
        ax = axs[i // 4, i % 4]
        for data in results[env]:
            ax.plot(data[0], data[2], 'o', alpha=0.5)
        ax.set_title(env)
        ax.set_xlabel("Discounted sum of rewards")
        ax.set_ylabel("Significant negative attributions")