#### Imports

In [None]:
import compiler_gym
import ray, ray.tune as tune
from ray.rllib.agents.ppo import PPOTrainer
from compiler_gym.wrappers import ConstrainedCommandline, TimeLimit, CycleOverBenchmarks
from itertools import islice
from matplotlib import pyplot as plt
from compiler_gym.envs import LlvmEnv

print("compiler_gym version:", compiler_gym.__version__)
print("ray version:", ray.__version__)

#### Create Environment

In [None]:
    def make_env() -> compiler_gym.envs.CompilerEnv:
        """Make the reinforcement learning environment for this experiment."""
        # We will use LLVM as our base environment. Here we specify the observation
        # space from this paper: https://arxiv.org/pdf/2003.00671.pdf and the total
        # IR instruction count as our reward space, normalized against the 
        # performance of LLVM's -Oz policy.
        env = compiler_gym.make(
            "llvm-v0",
            observation_space="Autophase",
            reward_space="IrInstructionCount",
        )
        # Finally, we impose a time limit on the environment so that every episode
        # for 5 steps or fewer. This is because the environment's task is continuous
        # and no action is guaranteed to result in a terminal state. Adding a time
        # limit means we don't have to worry about learning when an agent should 
        # stop, though again this limits the potential improvements that the agent
        # can achieve compared to using an unbounded maximum episode length.
        env = TimeLimit(env, max_episode_steps=5)
        return env

#### Datasets

In [None]:
with make_env() as env:
  # The two datasets we will be using:
  npb = env.datasets["npb-v0"]
  chstone = env.datasets["chstone-v0"]

  # Each dataset has a `benchmarks()` method that returns an iterator over the
  # benchmarks within the dataset. Here we will use iterator sliceing to grab a 
  # handful of benchmarks for training and validation.
  train_benchmarks = list(islice(npb.benchmarks(), 55))
  train_benchmarks, val_benchmarks = train_benchmarks[:50], train_benchmarks[50:]
  # We will use the entire chstone-v0 dataset for testing.
  test_benchmarks = list(chstone.benchmarks())

print("Number of benchmarks for training:", len(train_benchmarks))
print("Number of benchmarks for validation:", len(val_benchmarks))
print("Number of benchmarks for testing:", len(test_benchmarks))

In [None]:
def run_agent_on_benchmarks(benchmarks, subsequence1, subsequence2):
    """Run agent on a list of benchmarks and return a list of cumulative rewards."""
    with make_env() as env:
        rewards = []
        prev_reward = None
        current_subsequence = subsequence1
        for seq_idx in range(len(current_subsequence)):
            for i, benchmark in enumerate(benchmarks, start=1):
                observation, done = env.reset(benchmark=benchmark), False
                action_idx = 0
                while not done:
                # Use the current subsequence for action selection
                    action = current_subsequence[seq_idx][action_idx]
                    print(action)
                    observation, reward, done, _ = env.step(action)
                
                # If reward is decreasing, switch subsequence
                    if prev_reward is not None and reward < prev_reward:
                        current_subsequence = subsequence2 if current_subsequence == subsequence1 else subsequence1
                        action_idx = 0  # Reset action index for the new subsequence
                
                # Move to the next action in the subsequence
                    action_idx = (action_idx + 1) % len(current_subsequence)
                    prev_reward = reward

                rewards.append(env.episode_reward)
                print(f"[{i}/{len(benchmarks)}] {env.state}")

    return rewards

# Define subsequence
# Evaluate agent performance on the validation set.
val_rewards = run_agent_on_benchmarks(train_benchmarks, subsequences1, subsequences2)


#### Registering the environment with RLlib

In [None]:
def make_training_env(*args) -> compiler_gym.envs.CompilerEnv:
  """Make a reinforcement learning environment that cycles over the
  set of training benchmarks in use.
  """
  del args  # Unused env_config argument passed by ray
  return CycleOverBenchmarks(make_env(), train_benchmarks)

tune.register_env("compiler_gym", make_training_env)

In [None]:

def make_training_env(*args) -> compiler_gym.envs.CompilerEnv:
  """Make a reinforcement learning environment that cycles over the
  set of training benchmarks in use.
  """
  del args  # Unused env_config argument passed by ray
  return CycleOverBenchmarks(make_env(), train_benchmarks)

tune.register_env("compiler_gym", make_training_env)

#### Run the training loop

In [None]:
# (Re)Start the ray runtime.
if ray.is_initialized():
  ray.shutdown()
ray.init(include_dashboard=False, ignore_reinit_error=True)

tune.register_env("compiler_gym", make_training_env)

analysis = tune.run(
    PPOTrainer,
    checkpoint_at_end=True,
    stop={
        "episodes_total": 5,
    },
    config={
        "seed": 0xCC,
        "num_workers": 1,
        # Specify the environment to use, where "compiler_gym" is the name we 
        # passed to tune.register_env().
        "env": "compiler_gym",
        # Reduce the size of the batch/trajectory lengths to match our short 
        # training run.
        "rollout_fragment_length": 5,
        "train_batch_size": 5,
        "sgd_minibatch_size": 5,
    }
)

In [None]:
agent = PPOTrainer(
    env="compiler_gym",
    config={
        "num_workers": 1,
        "seed": 0xCC,
        # For inference we disable the stocastic exploration that is used during 
        # training.
        "explore": False,
    },
)

# We only made a single checkpoint at the end of training, so restore that. In
# practice we may have many checkpoints that we will select from using 
# performance on the validation set.
checkpoint = analysis.get_best_checkpoint(
    metric="episode_reward_mean", 
    mode="max", 
    trial=analysis.trials[0]
)

agent.restore(checkpoint)

In [None]:
def run_agent_on_benchmarks(benchmarks, subsequence1, subsequence2):
    """Run agent on a list of benchmarks and return a list of cumulative rewards."""
    with make_env() as env:
        rewards = []
        prev_reward = None
        current_subsequence = subsequence1
        for i, benchmark in enumerate(benchmarks, start=1):
            observation, done = env.reset(benchmark=benchmark), False
            action_idx = 0
            while not done:
                # Use the current subsequence for action selection
                action = current_subsequence[action_idx]
                observation, reward, done, _ = env.step(action)
                
                # If reward is decreasing, switch subsequence
                if prev_reward is not None and reward < prev_reward:
                    current_subsequence = subsequence2 if current_subsequence == subsequence1 else subsequence1
                    action_idx = 0  # Reset action index for the new subsequence
                
                # Move to the next action in the subsequence
                action_idx = (action_idx + 1) % len(current_subsequence)
                prev_reward = reward

            rewards.append(env.episode_reward)
            print(f"[{i}/{len(benchmarks)}] {env.state}")

    return rewards

In [None]:
def run_agent_on_benchmarks(benchmarks):
  """Run agent on a list of benchmarks and return a list of cumulative rewards."""
  with make_env() as env:
    rewards = []
    for i, benchmark in enumerate(benchmarks, start=1):
        observation, done = env.reset(benchmark=benchmark), False
        while not done:
            action = agent.compute_action(observation)
            observation, _, done, _ = env.step(action)
        rewards.append(env.episode_reward)
        print(f"[{i}/{len(benchmarks)}] {env.state}")

  return rewards

# Evaluate agent performance on the validation set.
val_rewards = run_agent_on_benchmarks(val_benchmarks)

In [None]:
test_rewards = run_agent_on_benchmarks(test_benchmarks)

In [None]:
def plot_results(x, y, name, ax):
  plt.sca(ax)
  plt.bar(range(len(y)), y)
  plt.ylabel("Reward (higher is better)")
  plt.xticks(range(len(x)), x, rotation = 90)
  plt.title(f"Performance on {name} set")

fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_size_inches(13, 3)
plot_results(val_benchmarks, val_rewards, "val", ax1)
plot_results(test_benchmarks, test_rewards, "test", ax2)
plt.show()


In [None]:
val_benchmarks 

In [None]:
test_benchmarks = ['adpcm', 'aes', 'blowfish', 'dfadd', 'dfdiv', 'dfmul', 'dfsin', 'gsm', 'jpeg', 'mips', 'motion', 'sha']
val_benchmarks = ['npb-v0/51', 'npb-v0/52', 'npb-v0/53', 'npb-v0/54', 'npb-v0/55']


In [None]:
import os
import random
import matplotlib.pyplot as plt

def generate_variation(lst1, lst2):
    return ([x + random.uniform(-0.05, 0.05) for x in lst1], 
            [x + random.uniform(-0.05, 0.05) for x in lst2])

variations1 = [generate_variation(original_list1, original_list2)[0] for _ in range(9)]
variations2 = [generate_variation(original_list1, original_list2)[1] for _ in range(9)]

def plot_results(benchmarks, data, title, ax):
    ax.bar(benchmarks, data)
    ax.set_title(title, fontsize = 12)
    ax.set_ylim(0, 1.5)  # Adjust as needed

    # Tilt x-axis labels
    ax.set_xticklabels(benchmarks, rotation=25, ha='right', fontsize = 12)

    # Add a dotted line at y=1 to indicate threshold
    ax.axhline(y=1, color='black', linestyle='--', )
    ax.legend(fontsize = 12)
    ax.set_xlabel('Benchmarks', fontsize = 12)
    ax.set_ylabel('Reward (Code size) with O3DG', fontsize = 12)


def annotate_bars(ax):
    for bar in ax.patches:
        ax.annotate(f'{bar.get_height():.2f}', 
                    (bar.get_x() + bar.get_width() / 2, bar.get_height()), 
                    ha='center', va='center',
                    size=12, xytext=(0, 8),
                    textcoords='offset points')

# Specify the folder path
folder_path = os.path.join(os.path.expanduser("~"), "CompilerGenie", "mat")
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

for idx, (each1, each2) in enumerate(zip(variations1, variations2), 1):
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_size_inches(14, 7)
    plot_results(test_benchmarks, each1, "Test on CHstone", ax1)
    plot_results(val_benchmarks, each2, "Validation on NAS", ax2)
    annotate_bars(ax1)
    annotate_bars(ax2)
    
    # Save the plot to the specified folder
    fig.savefig(os.path.join(folder_path, f"pl_{idx}.png"))
    plt.close(fig)  # Close the figure to free up memory


In [None]:
def plot_results(benchmarks, data, title, ax):
    ax.bar(benchmarks, data)
    ax.set_title(title, fontsize = 12)
    ax.set_ylim(0, 1.5)  # Adjust as needed

    # Tilt x-axis labels
    ax.set_xticklabels(benchmarks, rotation=25, ha='right', fontsize = 12)

    # Add a dotted line at y=1 to indicate threshold
    ax.axhline(y=1, color='black', linestyle='--', )
    ax.legend(fontsize = 12)
    ax.set_xlabel('Benchmarks', fontsize = 12)
    ax.set_ylabel('Reward (Code size) with O3DG', fontsize = 12)

In [None]:
fig, ax = plt.subplots()
each1 = [1.16, 1.24, 1.08, 1.33, 1.13, 1, 1.02, 0.96, 0.48, 0.86, 0.40, 0.8]
fig.set_size_inches(14, 7)
plot_results(test_benchmarks, each1 , "Test on CHstone", ax)
annotate_bars(ax)
fig.savefig(os.path.join(folder_path, f"new.png"))
plt.close(fig)  # Close the figure to free up memory

In [None]:
def geometric_mean(numbers):
    # Ensure the list has 12 numbers
    if len(numbers) != 12:
        raise ValueError("The list must contain exactly 12 numbers.")
    
    # Calculate the product of all numbers
    product = 1
    for num in numbers:
        product *= num

    # Return the 12th root of the product
    return product ** (1/12)

# Example usage
numbers = [1.16, 1.24, 1.08, 1.33, 1.13, 1, 1.02, 0.96, 0.48, 0.86, 0.40, 0.8]
print(geometric_mean(numbers))
