In [1]:
import os
import pandas as pd
import numpy as np

# Benchmarks list
benchmarks = ["bert_large", "Resnet50", "UNet", "CRADL","gromacs","miniGAN"]
benchmarks = ["Resnet50", "UNet"]
# benchmarks = ["NAMD"]


run_id = 1
# Base input directory structure
input_base_dir = f"./ecp_power_cap_res/runs/run{run_id}/{{}}/unfiltered/"
input_base_dir = f"./A30/ecp_power_cap_res/runs/run{run_id}/{{}}/unfiltered/"


# Output base directory

output_base_dir = f"./ecp_power_cap_res/runs/run{run_id}/"
output_base_dir = f"./A30/ecp_power_cap_res/runs/run{run_id}/"


# Ensure output directories exist
for benchmark in benchmarks:
    os.makedirs(os.path.join(output_base_dir, benchmark), exist_ok=True)

def process_csv_files(cpu_power, gpu_power, app_name, input_dir):
    """Process CSV files for a given CPU and GPU power combination."""
    prefix = f"{cpu_power}_{gpu_power}"
    
    # File paths
    gpu_metrics_file = os.path.join(input_dir, f"{prefix}_gpu_metrics.csv")
    cpu_power_file = os.path.join(input_dir, f"{prefix}_cpu_power.csv")
    cpu_metrics_file = os.path.join(input_dir, f"{prefix}_cpu_metrics.csv")

    # Output directory
    output_dir = os.path.join(output_base_dir, app_name)
    os.makedirs(output_dir, exist_ok=True)

    # Read GPU metrics
    df_gpu = pd.read_csv(gpu_metrics_file)
    if app_name == "UNet":
        count = 5
    elif app_name == "gromacs":
        count = 1
    elif app_name == "miniGAN":
        count = 1
    else:
        count = 15
    # Find first row where FP16 Active + FP32 Active + FP64 Active > 0 for 20 continuous rows
    fp_active = df_gpu["FP16 Active"] + df_gpu["FP32 Active"] + df_gpu["FP64 Active"]
    for i in range(len(fp_active) - count - 1):
        if all(fp_active.iloc[i:i+count] > 0):
            T = df_gpu.iloc[i]["Time (s)"]
            df_gpu = df_gpu.iloc[i:].copy()  # Keep rows from T onward
            df_gpu["Time (s)"] -= T  # Normalize time
            break
    else:
        print(f"No valid GPU activity found for {prefix}, skipping.")
        return

    # Read CPU power and metrics
    df_cpu_power = pd.read_csv(cpu_power_file)
    df_cpu_metrics = pd.read_csv(cpu_metrics_file)

    # Find nearest timestamp in CPU power and metrics files
    T_cpu_power = df_cpu_power["Time (s)"].sub(T).abs().idxmin()
    T_cpu_metrics = df_cpu_metrics["Time (s)"].sub(T).abs().idxmin()

    # Process CPU power file
    df_cpu_power = df_cpu_power.iloc[T_cpu_power:].copy()
    df_cpu_power["Time (s)"] -= df_cpu_power.iloc[0]["Time (s)"]

    # Process CPU metrics file
    df_cpu_metrics = df_cpu_metrics.iloc[T_cpu_metrics:].copy()
    df_cpu_metrics["Time (s)"] -= df_cpu_metrics.iloc[0]["Time (s)"]

    # Save processed files
    df_gpu.to_csv(os.path.join(output_dir, f"{prefix}_gpu_metrics.csv"), index=False)
    df_cpu_power.to_csv(os.path.join(output_dir, f"{prefix}_cpu_power.csv"), index=False)
    df_cpu_metrics.to_csv(os.path.join(output_dir, f"{prefix}_cpu_metrics.csv"), index=False)

    print(f"Processed {prefix} and saved in {output_dir}")

# Iterate over specified benchmarks
for benchmark in benchmarks:
    input_dir = input_base_dir.format(benchmark)
    
    if not os.path.exists(input_dir):
        print(f"Input directory {input_dir} does not exist, skipping {benchmark}.")
        continue

    for filename in os.listdir(input_dir):
        if "_gpu_metrics.csv" in filename:
            parts = filename.split("_")
            cpu_power, gpu_power = parts[0], parts[1]
            process_csv_files(cpu_power, gpu_power, benchmark, input_dir)


Processed 200_180 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 180_190 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 180_250 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 160_200 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 140_190 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 200_150 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 180_230 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 190_220 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 170_210 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 200_220 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 180_150 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 160_180 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 170_250 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 170_220 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 160_240 and saved in ./ecp_power_cap_res/runs/run1/UNet
Processed 

In [5]:
import os
import shutil

# Base directory where run folders are located
base_dir = "./ecp_power_cap_res/runs/"

# Define the runs and application names
runs = ["run1", "run2", "run3", "run4", "run5"]
apps = ["bert_large", "Resnet50", "UNet", "CRADL", "gromacs", "miniGAN","NAMD"]
apps = ["NAMD"]

# Traverse each run and app directory
for run in runs:
    for app in apps:
        app_path = os.path.join(base_dir, run, app)
        unfiltered_path = os.path.join(app_path, "unfiltered")

        if os.path.isdir(app_path):
            os.makedirs(unfiltered_path, exist_ok=True)

            # Move all CSV files to the unfiltered folder
            for file in os.listdir(app_path):
                if file.endswith(".csv"):
                    src = os.path.join(app_path, file)
                    dst = os.path.join(unfiltered_path, file)
                    shutil.move(src, dst)

In [1]:
import os
import pandas as pd
import numpy as np

# Benchmarks list
benchmarks = ["bert_large", "Resnet50", "UNet", "CRADL", "gromacs", "miniGAN"]
benchmarks = ["NAMD"]


# Base directory containing run folders
base_run_dir = "./ecp_power_cap_res/runs"

def process_csv_files(cpu_power, gpu_power, app_name, input_dir, output_dir):
    """Process CSV files for a given CPU and GPU power combination."""
    prefix = f"{cpu_power}_{gpu_power}"

    # File paths
    gpu_metrics_file = os.path.join(input_dir, f"{prefix}_gpu_metrics.csv")
    cpu_power_file = os.path.join(input_dir, f"{prefix}_cpu_power.csv")
    cpu_metrics_file = os.path.join(input_dir, f"{prefix}_cpu_metrics.csv")

    # Check if files exist
    if not all(os.path.exists(f) for f in [gpu_metrics_file, cpu_power_file, cpu_metrics_file]):
        print(f"Missing files for {prefix} in {input_dir}, skipping.")
        return

    # Read GPU metrics
    df_gpu = pd.read_csv(gpu_metrics_file)
    if app_name == "UNet":
        count = 5
    elif app_name == "gromacs":
        count = 1
    elif app_name == "miniGAN":
        count = 1
    elif app_name == "NAMD":
        count = 5
    else:
        count = 15

    # Find first row with sustained GPU activity
    fp_active = df_gpu["FP16 Active"] + df_gpu["FP32 Active"] + df_gpu["FP64 Active"]
    for i in range(len(fp_active) - count - 1):
        if all(fp_active.iloc[i:i + count] > 0):
            T = df_gpu.iloc[i]["Time (s)"]
            df_gpu = df_gpu.iloc[i:].copy()
            df_gpu["Time (s)"] -= T
            break
    else:
        print(f"No valid GPU activity found for {prefix}, skipping.")
        return

    # Read CPU power and metrics
    df_cpu_power = pd.read_csv(cpu_power_file)
    df_cpu_metrics = pd.read_csv(cpu_metrics_file)

    # Align timestamps
    T_cpu_power = df_cpu_power["Time (s)"].sub(T).abs().idxmin()
    T_cpu_metrics = df_cpu_metrics["Time (s)"].sub(T).abs().idxmin()

    df_cpu_power = df_cpu_power.iloc[T_cpu_power:].copy()
    df_cpu_power["Time (s)"] -= df_cpu_power.iloc[0]["Time (s)"]

    df_cpu_metrics = df_cpu_metrics.iloc[T_cpu_metrics:].copy()
    df_cpu_metrics["Time (s)"] -= df_cpu_metrics.iloc[0]["Time (s)"]

    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Save processed files
    df_gpu.to_csv(os.path.join(output_dir, f"{prefix}_gpu_metrics.csv"), index=False)
    df_cpu_power.to_csv(os.path.join(output_dir, f"{prefix}_cpu_power.csv"), index=False)
    df_cpu_metrics.to_csv(os.path.join(output_dir, f"{prefix}_cpu_metrics.csv"), index=False)

    print(f"Processed {prefix} -> {output_dir}")

# Walk through each run and benchmark
for run_name in os.listdir(base_run_dir):
    run_path = os.path.join(base_run_dir, run_name)
    if not os.path.isdir(run_path):
        continue

    for benchmark in benchmarks:
        input_dir = os.path.join(run_path, benchmark, "unfiltered")
        output_dir = os.path.join(run_path, benchmark)

        if not os.path.exists(input_dir):
            print(f"Missing unfiltered dir: {input_dir}, skipping.")
            continue

        for filename in os.listdir(input_dir):
            if "_gpu_metrics.csv" in filename:
                parts = filename.split("_")
                cpu_power, gpu_power = parts[0], parts[1]
                process_csv_files(cpu_power, gpu_power, benchmark, input_dir, output_dir)


Processed 200_180 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 180_190 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 180_250 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 160_200 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 140_190 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 200_150 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 180_230 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 190_220 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 170_210 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 200_220 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 180_150 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 160_180 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 170_250 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 170_220 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 160_240 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 190_200 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 170_240 -> ./ecp_power_cap_res/runs/run5/NAMD
Processed 150_250 -> ./ecp_power_cap_res/runs/ru