In [19]:
import ast
import pandas as pd
import os
from itertools import product

# Define paths
performance_matrix_paths = {
    "default": "./prediction_res/performance_matrix_gpu.csv",
    "hec": "./prediction_res/performance_matrix_gpu_hec.csv"
}
power_cap_paths = {
    "default": "./ecp_power_cap_res/runs/run1/",
    "hec": "./hec_power_cap_res/runs/run1/"
}

# Applications mapped to each matrix
apps_by_matrix = {
    "default": ['miniGAN', 'sw4lite', 'bert_large', 'UNet', 'Resnet50', 'lammps', 'gromacs', 'NAMD', 'Laghos', 'XSBench'],
    "hec": ["addBiasResidualLayerNorm", "aobench", "background-subtract", "convolution3D", "dropout",
            "extrema", "knn", "softmax", "zmddft", "zoom", "chacha20", "kalman", "stencil3d"]
}

# Load performance matrices
performance_matrices = {key: pd.read_csv(path, index_col=0) for key, path in performance_matrix_paths.items()}

# Power cap ranges
cpu_caps = [200, 190, 180, 170, 160, 150, 140]
gpu_caps = [250, 240, 230, 220, 210, 200, 190, 180, 170, 160, 150]
power_cap_pairs = list(product(cpu_caps, gpu_caps))

# Threshold for performance loss
threshold = 0.05

# Store all valid power pairs per app
energy_saving_results = {}

for matrix_key, app_list in apps_by_matrix.items():
    performance_matrix = performance_matrices[matrix_key]
    app_path_base = power_cap_paths[matrix_key]

    for app in app_list:
        app_path = os.path.join(app_path_base, app)

        baseline_pair = "(200, 250)"
        if baseline_pair not in performance_matrix.index or app not in performance_matrix.columns:
            continue

        baseline_perf = performance_matrix.loc[baseline_pair, app]

        baseline_cpu_file = os.path.join(app_path, "200_250_cpu_power.csv")
        baseline_gpu_file = os.path.join(app_path, "200_250_gpu_metrics.csv")

        if not os.path.exists(baseline_cpu_file) or not os.path.exists(baseline_gpu_file):
            continue

        baseline_cpu_data = pd.read_csv(baseline_cpu_file)
        baseline_gpu_data = pd.read_csv(baseline_gpu_file)

        baseline_exec_time = baseline_cpu_data["Time (s)"].iloc[-1]
        baseline_cpu_energy = (baseline_cpu_data["Package Power (W)"].mean() +
                               baseline_cpu_data["DRAM Power (W)"].mean()) * baseline_exec_time
        baseline_gpu_energy = baseline_gpu_data["Power (W)"].mean() * baseline_exec_time
        baseline_total_energy = baseline_cpu_energy + baseline_gpu_energy

        valid_power_pairs = []

        # for cpu_power, gpu_power in power_cap_pairs:
        #     pair_key = f"({cpu_power}, {gpu_power})"
        #     if pair_key not in performance_matrix.index:
        #         continue

        #     norm_perf = performance_matrix.loc[pair_key, app]
        #     perf_loss = 1 - (norm_perf / baseline_perf)

        #     if perf_loss > threshold:
        #         continue

        #     cpu_file = os.path.join(app_path, f"{cpu_power}_{gpu_power}_cpu_power.csv")
        #     gpu_file = os.path.join(app_path, f"{cpu_power}_{gpu_power}_gpu_metrics.csv")

        #     if not os.path.exists(cpu_file) or not os.path.exists(gpu_file):
        #         continue

        #     cpu_data = pd.read_csv(cpu_file)
        #     gpu_data = pd.read_csv(gpu_file)

        #     exec_time = cpu_data["Time (s)"].iloc[-1]
        #     cpu_energy = (cpu_data["Package Power (W)"].mean() + cpu_data["DRAM Power (W)"].mean()) * exec_time
        #     gpu_energy = gpu_data["Power (W)"].mean() * exec_time
        #     total_energy = cpu_energy + gpu_energy

        #     energy_saving = baseline_total_energy - total_energy
        #     energy_saving_percent = energy_saving / baseline_total_energy * 100

        #     valid_power_pairs.append({
        #         "Power Pair": f"{cpu_power}_{gpu_power}",
        #         "Performance Loss (%)": perf_loss * 100,
        #         "Energy Saving (%)": energy_saving_percent
        #     })

        for cpu_power, gpu_power in power_cap_pairs:
            pair_key = f"({cpu_power}, {gpu_power})"
            if pair_key not in performance_matrix.index:
                continue
        
            # Predicted normalized performance
            norm_perf = performance_matrix.loc[pair_key, app]
            predicted_perf_loss = 1 - (norm_perf / baseline_perf)
        
            if predicted_perf_loss > threshold or predicted_perf_loss < 0:
                continue
        
            cpu_file = os.path.join(app_path, f"{cpu_power}_{gpu_power}_cpu_power.csv")
            gpu_file = os.path.join(app_path, f"{cpu_power}_{gpu_power}_gpu_metrics.csv")
        
            if not os.path.exists(cpu_file) or not os.path.exists(gpu_file):
                continue
        
            cpu_data = pd.read_csv(cpu_file)
            gpu_data = pd.read_csv(gpu_file)
        
            # Actual performance from execution time
            actual_exec_time = gpu_data["Time (s)"].iloc[-1]
            actual_perf = baseline_exec_time / actual_exec_time
            actual_perf_loss = 1 - actual_perf
        
            if actual_perf_loss > threshold or actual_perf_loss < 0:
                continue
        
            # Energy computation
            cpu_energy = (cpu_data["Package Power (W)"].mean() + cpu_data["DRAM Power (W)"].mean()) * actual_exec_time
            gpu_energy = gpu_data["Power (W)"].mean() * actual_exec_time
            total_energy = cpu_energy + gpu_energy
        
            energy_saving = baseline_total_energy - total_energy
            energy_saving_percent = energy_saving / baseline_total_energy * 100
        
            # Predicted energy upper bound
            predicted_energy = (cpu_power + gpu_power) * (1 / norm_perf)
            predicted_baseline_energy = (200 + 250) * 1
            predicted_energy_saving_percent = (predicted_baseline_energy - predicted_energy) / predicted_baseline_energy * 100

            valid_power_pairs.append({
                "Power Pair": f"{cpu_power}_{gpu_power}",
                "Predicted Performance Loss (%)": predicted_perf_loss * 100,
                "Actual Performance Loss (%)": actual_perf_loss * 100,
                "Energy Saving (%)": energy_saving_percent,
                "Predicted Energy Saving (%)": predicted_energy_saving_percent
            })

        # Mark the best predicted energy saving
        if valid_power_pairs:
            df = pd.DataFrame(valid_power_pairs)
            best_idx = df["Predicted Energy Saving (%)"].idxmax()
            df["Selected"] = False
            df.loc[best_idx, "Selected"] = True
            energy_saving_results[app] = df.to_dict(orient="records")


        if valid_power_pairs:
            energy_saving_results[app] = valid_power_pairs

In [20]:
for app, results in energy_saving_results.items():
    print(f"\n=== {app} ===")
    df = pd.DataFrame(results)
    print(df.to_string(index=False))


=== miniGAN ===
Power Pair  Predicted Performance Loss (%)  Actual Performance Loss (%)  Energy Saving (%)  Predicted Energy Saving (%)
   200_210                        0.824589                     2.355415           5.374807                     5.381836
   190_220                        0.002327                     2.565106           3.358114                     6.159864
   180_240                        1.011057                     1.929289           3.322977                     2.891494
   180_230                        0.493110                     1.954521           5.714148                     5.697030
   180_220                        2.240614                     2.121929           7.376731                     6.352498
   170_250                        2.283580                     1.931770           4.488260                     1.626888
   170_220                        2.420900                     2.484998           8.326271                     8.524990
   170_210             