In [1]:
%%html
<style>
.output_wrapper, .output {
    height:auto !important;
    max-height: 999999999999 in;  /* your desired max-height here */
}
.output_scroll {
    box-shadow:none !important;
    webkit-box-shadow:none !important;
}
</style>

In [2]:
%matplotlib inline
import ast
import os
import os.path as osp
from glob import glob

from IPython.display import display, Image, Markdown
from ipywidgets import interact, interactive
import ipywidgets as widgets 
import matplotlib.pylab as plt
import numpy as np
import pickle

from attackgraph import settings
import attackgraph.gambit_analysis as gambit_ops
import attackgraph.common.plot_ops as plot_ops

np.set_printoptions(precision=2)
RESULTS_DIR = settings.get_results_dir()

In [3]:
def display_result(run_name: str=""):
    """
    
    :param run_name: Name of the run.
    :type run_name: str
    """
    if run_name is None:
        return
    run_dir = osp.join(RESULTS_DIR, run_name)
    display(Markdown(f"# {osp.basename(run_dir)}"))
    display(run_dir)    
    
    # Display the opponent's mixture.
    config_filepath = osp.join(run_dir, "config.txt")
    with open(config_filepath, "r") as config_file:
        lines = [line.rstrip("\n") for line in config_file]
        # Parse out the "evaluate_qmix.mixture" line.
        mixture = [x for x in lines if "mixture" in x][0]
        # Now we have "mixture = [0.3, ....]", and need to get the mixing coefficients.
        equal_index = mixture.find('=')        
        mixture = mixture[24:]
        # Now we have "[0.3, ...]" and need to parse it into a list.
        mixture = ast.literal_eval(mixture)
        mixture = np.array(mixture)
        print("Mixture: ", mixture)            
        
    # Parse out the various opponent simulated rewards.
    log_filepath = osp.join(run_dir, "out.log")
    with open(log_filepath, "r") as log_file:
        lines = [line.rstrip("\n") for line in log_file]
        simulated_rewards = [x for x in lines if "Opponent" in x and "vs." not in x]      
        simulated_rewards = [float(x.split(' ')[9].strip(',')) for x in simulated_rewards]
        print(simulated_rewards)
        
    sim_qmix_rewards = simulated_rewards[:3]
    sim_qmix_statefreq_rewards = simulated_rewards[3:6]
    sim_br_mixture_rewards = simulated_rewards[6:]            
            
    # Display pure strategy learning curves.
    display(Markdown("## Best Response to Pure Strategy"))
    reward_files = glob(osp.join(run_dir, "mean_rewards.*.pkl"))
    reward_files = [x for x in reward_files if "mixture" not in x]
    for opp_i, reward_filepath in enumerate(reward_files):
        with open(reward_filepath, "rb") as reward_file:
            rewards = pickle.load(reward_file)
            
            fig, ax = plt.subplots()
        
            plt.plot(np.arange(len(rewards)), rewards)
            # plt.axhline(y=sim_qmix_rewards[opp_i], color='c', linestyle='-')
            # plt.axhline(y=sim_qmix_statefreq_rewards[opp_i], color='m', linestyle='-')
            ax.set_title(osp.basename(reward_filepath).replace("_", "\_"))
            ax.set_ylabel("Mean 250ep Return")
            ax.set_xlabel("Timestep")
            plt.show()
            plt.close()
        
    # Load the QMixture's simulated payoff.
    display(Markdown("## QMixture's Simulated Performance"))
    print(sim_qmix_rewards)
    sim_qmix_mix_reward = np.sum(sim_qmix_rewards * mixture)
    sim_qmix_statefreq_mix_reward = np.sum(sim_qmix_statefreq_rewards * mixture)
    
    display(Markdown(f" - Q-Mix: {np.sum(sim_qmix_mix_reward)}"))
    display(Markdown(f" - Q-Mix w/ State-Freq: {sim_qmix_statefreq_mix_reward}"))
    
    # Display the mixture-opponent learning curve with QMixture's payoff overlayed.
    display(Markdown("## Best Response to Mixed Strategy"))
    reward_files = glob(osp.join(run_dir, "mean_rewards.*.pkl"))
    reward_filepath = [x for x in reward_files if "mixture" in x][0]
    with open(reward_filepath, "rb") as reward_file:
        rewards = pickle.load(reward_file)

        fig, ax = plt.subplots()
        plt.plot(np.arange(len(rewards)), rewards)
        #plt.axhline(y=sim_qmix_mix_reward, color='c', linestyle='-')
        #plt.axhline(y=sim_qmix_statefreq_mix_reward, color='m', linestyle='-')
        ax.set_title(osp.basename(reward_filepath).replace("_", "\_"))
        ax.set_ylabel("Mean 250ep Return")
        ax.set_xlabel("Timestep")
        plt.show()
        plt.close()    
    

In [4]:
# Collect the names of every run result.
run_names = [x for x in os.listdir(RESULTS_DIR) if osp.isdir(osp.join(RESULTS_DIR, x))]
run_names = sorted(run_names)

run_name_widget = widgets.Dropdown(
    options=run_names,
    description="Run: ")

display(interactive(
    display_result,
    run_name=run_name_widget))

interactive(children=(Dropdown(description='Run: ', options=('08', '09_09_dqn_tanh_fast_dist', '09_11_dqn_tanhâ€¦