In [1]:
import csv
import math
import re
from pathlib import Path
from collections import defaultdict
import numpy as np

ROOT_DIR = Path("/home/ac.zzheng/power-GPU-count/GPGPU/data/H100/spec_power_motif")
APPS = ['lbm', 'cloverleaf', 'tealeaf', 'minisweep', 'pot3d', 'miniweather', 'hpgmg']

ACTIVE_POWER_TH = 120.0
ACTIVE_SM_TH = 400.0
PROFILE_SECONDS = 5.0

# Set to None for all power caps, or provide a list e.g. [800, 900, 1000]
# FOCUS_POWER_CAPS = [800, 900, 1000, 1100, 1200, 1300, 1500, 1600, 1700, 1800, 1900, 2000]
FOCUS_POWER_CAPS = [800, 900, 1000, 1200, 1400, 1600, 2000]

FILE_RE = re.compile(r"(?P<cap>\d+)_(?P<gpus>\d+)_gpu_metrics\.csv$")


def f(x):
    try:
        return float(str(x).strip())
    except:
        return None


def mean(xs):
    return sum(xs) / len(xs) if xs else float("nan")


def sigmoid(z):
    z = np.clip(z, -40, 40)
    return 1.0 / (1.0 + np.exp(-z))


def read_runtime_lookup(path):
    m = {}
    if not path.exists():
        return m
    with path.open(newline="") as fh:
        r = csv.DictReader(fh)
        if not {"power_cap", "gpu_count", "runtime_seconds"}.issubset(set(r.fieldnames or [])):
            return m
        for row in r:
            cap, g, t = f(row.get("power_cap")), f(row.get("gpu_count")), f(row.get("runtime_seconds"))
            if cap is not None and g is not None and t is not None:
                m[(int(round(cap)), int(round(g)))] = t
    return m


def analyze_metric_file(path, runtime_lookup):
    m = FILE_RE.match(path.name)
    if not m:
        return None
    cap = int(m.group("cap"))
    gcount = int(m.group("gpus"))

    with path.open(newline="") as fh:
        r = csv.DictReader(fh)
        rows = list(r)
        cols = r.fieldnames or []

    if not rows or "Time (s)" not in cols:
        return None

    # Drop idle points where GPU0 DRAM activity is zero
    if "GPU0_DRAM_Active" in cols:
        filtered_rows = []
        for row in rows:
            v = f(row.get("GPU0_DRAM_Active"))
            if v is not None and v != 0.0:
                filtered_rows.append(row)
        rows = filtered_rows

    if not rows:
        return None

    gpu_ids = sorted({int(c.split("_")[0].replace("GPU", "")) for c in cols if c.startswith("GPU") and "_" in c})

    def colvals(c):
        out = []
        for row in rows:
            v = f(row.get(c))
            if v is not None:
                out.append(v)
        return out

    times = colvals("Time (s)")
    if not times:
        return None

    # Restrict feature extraction to first PROFILE_SECONDS (online-like profiling)
    t0 = min(times)
    t1 = t0 + PROFILE_SECONDS
    profile_rows = []
    for row in rows:
        tv = f(row.get("Time (s)"))
        if tv is not None and tv <= t1:
            profile_rows.append(row)
    if not profile_rows:
        profile_rows = rows

    def prof_colvals(c):
        out = []
        for row in profile_rows:
            v = f(row.get(c))
            if v is not None:
                out.append(v)
        return out

    # per-GPU means from first PROFILE_SECONDS only
    p_avg, sm_avg, dr_avg = {}, {}, {}
    for gid in gpu_ids:
        p_avg[gid] = mean(prof_colvals(f"GPU{gid}_Power (W)"))
        sm_avg[gid] = mean(prof_colvals(f"GPU{gid}_SM_Clock (MHz)"))
        dr_avg[gid] = mean(prof_colvals(f"GPU{gid}_DRAM_Active"))

    active = [gid for gid in gpu_ids if ((not math.isnan(p_avg[gid]) and p_avg[gid] >= ACTIVE_POWER_TH) or
                                         (not math.isnan(sm_avg[gid]) and sm_avg[gid] >= ACTIVE_SM_TH))]
    if not active:
        active = gpu_ids[:]

    runtime = runtime_lookup.get((cap, gcount), max(times) - min(times))

    avg_power = mean([p_avg[g] for g in active])
    avg_sm = mean([sm_avg[g] for g in active])
    avg_dram = mean([dr_avg[g] for g in active])

    return {
        "power_cap": cap,
        "gpu_count": gcount,
        "runtime": runtime,
        "avg_power": avg_power,
        "avg_sm": avg_sm,
        "avg_dram": avg_dram,
        "avg_power_x_gpu": avg_power * gcount,
        "avg_sm_x_gpu": avg_sm * gcount,
        "avg_dram_x_gpu": avg_dram * gcount,
    }


def load_runs(app_dir, runtime_csv):
    runtime_lookup = read_runtime_lookup(runtime_csv)
    rows = []
    for p in sorted(app_dir.iterdir()):
        if FILE_RE.match(p.name):
            x = analyze_metric_file(p, runtime_lookup)
            if x is not None:
                rows.append(x)
    rows.sort(key=lambda z: (z["power_cap"], z["gpu_count"]))
    return rows


def filter_runs_by_caps(rows, focus_caps=None):
    if focus_caps is None:
        return rows
    focus = {int(x) for x in focus_caps}
    return [r for r in rows if int(r["power_cap"]) in focus]


def kendall_tau(rank_a, rank_b):
    pos_a = {v: i for i, v in enumerate(rank_a)}
    pos_b = {v: i for i, v in enumerate(rank_b)}
    items = list(pos_a.keys())
    c = d = 0
    for i in range(len(items)):
        for j in range(i + 1, len(items)):
            u, v = items[i], items[j]
            sa = pos_a[u] - pos_a[v]
            sb = pos_b[u] - pos_b[v]
            if sa * sb > 0:
                c += 1
            elif sa * sb < 0:
                d += 1
    return (c - d) / (c + d) if (c + d) else 0.0


def fit_and_rank_single_cap(cap_rows, lr=0.08, epochs=5000, l2=1e-3):
    cap_rows = sorted(cap_rows, key=lambda r: r["gpu_count"])
    feats = [np.array([
        r["gpu_count"],
        r["avg_power_x_gpu"],
        r["avg_sm_x_gpu"],
        r["avg_dram_x_gpu"],
    ], dtype=float) for r in cap_rows]

    X, y = [], []
    for i in range(len(cap_rows)):
        for j in range(i + 1, len(cap_rows)):
            diff = feats[i] - feats[j]
            label = 1.0 if cap_rows[i]["runtime"] < cap_rows[j]["runtime"] else 0.0
            X.append(diff); y.append(label)
            X.append(-diff); y.append(1.0 - label)

    X = np.array(X, dtype=float)
    y = np.array(y, dtype=float)

    mu = X.mean(axis=0)
    sd = X.std(axis=0)
    sd[sd == 0] = 1.0
    Z = (X - mu) / sd

    w = np.zeros(Z.shape[1], dtype=float)
    b = 0.0
    n = len(y)

    for _ in range(epochs):
        p = sigmoid(Z @ w + b)
        grad_w = (Z.T @ (p - y)) / n + l2 * w
        grad_b = np.mean(p - y)
        w -= lr * grad_w
        b -= lr * grad_b

    scores = np.zeros(len(cap_rows), dtype=float)
    for i in range(len(cap_rows)):
        for j in range(len(cap_rows)):
            if i == j:
                continue
            diff = (feats[i] - feats[j] - mu) / sd
            scores[i] += sigmoid(diff @ w + b)

    order = np.argsort(-scores)
    pred_rank = [cap_rows[k]["gpu_count"] for k in order]
    true_rank = [r["gpu_count"] for r in sorted(cap_rows, key=lambda x: x["runtime"])]
    return pred_rank, true_rank


summary_rows = []

for app in APPS:
    APP_DIR = ROOT_DIR / app
    RUNTIME_CSV = APP_DIR / "runtime.csv"

    if not APP_DIR.exists():
        print(f"[skip] app dir not found: {APP_DIR}")
        continue

    runs = load_runs(APP_DIR, RUNTIME_CSV)
    runs = filter_runs_by_caps(runs, FOCUS_POWER_CAPS)

    if not runs:
        print(f"[skip] no runs after filter for app={app}")
        continue

    by_cap = defaultdict(list)
    for r in runs:
        by_cap[r["power_cap"]].append(r)

    caps = sorted(by_cap.keys())
    print(f"\n=== {app} ===")
    print(f"Focused power caps: {caps}")

    # # Print per-case metrics
    # print("Per-case metrics (active-GPU averages; first 5s window after filtering GPU0_DRAM_Active==0):")
    # print("power_cap,gpu_count,runtime,avg_power,avg_sm,avg_dram,avg_power_x_gpu,avg_sm_x_gpu,avg_dram_x_gpu")
    # for r in sorted(runs, key=lambda x: (x["power_cap"], x["gpu_count"])):
    #     print(
    #         f"{r['power_cap']},{r['gpu_count']},"
    #         f"{r['runtime']:.6f},{r['avg_power']:.6f},{r['avg_sm']:.6f},{r['avg_dram']:.6f},"
    #         f"{r['avg_power_x_gpu']:.6f},{r['avg_sm_x_gpu']:.6f},{r['avg_dram_x_gpu']:.6f}"
    #     )

    top1_hits = 0
    taus = []
    results = []

    print("Independent per-cap ranking:")
    for cap in caps:
        pred_rank, true_rank = fit_and_rank_single_cap(by_cap[cap])
        ok = int(pred_rank[0] == true_rank[0])
        tau = kendall_tau(pred_rank, true_rank)
        top1_hits += ok
        taus.append(tau)

        cap_rows = sorted(by_cap[cap], key=lambda x: x["runtime"])
        best_runtime = cap_rows[0]["runtime"]
        worst_runtime = cap_rows[-1]["runtime"]
        speedup_vs_worst = worst_runtime / best_runtime if best_runtime > 0 else float("inf")

        results.append({
            "power_cap": cap,
            "pred_rank": pred_rank,
            "true_rank": true_rank,
            "pred_optimal_gpu": pred_rank[0],
            "true_optimal_gpu": true_rank[0],
            "top1_correct": ok,
            "kendall_tau": tau,
            "best_runtime": best_runtime,
            "worst_runtime": worst_runtime,
            "speedup_vs_worst": speedup_vs_worst,
        })
        print(
            f"  cap {cap}: pred={pred_rank} | true={true_rank} | top1={ok} | tau={tau:.3f} "
            f"| speedup_vs_worst={speedup_vs_worst:.3f}x"
        )

    top1_acc = top1_hits / len(caps)
    mean_tau = float(np.mean(taus))
    print(f"Top-1 optimal-gpu-count accuracy (independent per-cap): {top1_hits}/{len(caps)} = {top1_acc:.3f}")
    print(f"Mean Kendall tau (independent per-cap): {mean_tau:.3f}")

    # Export outputs per app
    out_feat_csv = APP_DIR / f"{app}_rank_features_scaled.csv"
    with out_feat_csv.open("w", newline="") as fh:
        wcsv = csv.writer(fh)
        wcsv.writerow([
            "power_cap", "gpu_count", "runtime",
            "avg_power_active", "avg_sm_active", "avg_dram_active",
            "avg_power_x_gpu", "avg_sm_x_gpu", "avg_dram_x_gpu"
        ])
        for r in sorted(runs, key=lambda x: (x["power_cap"], x["gpu_count"])):
            wcsv.writerow([
                r["power_cap"], r["gpu_count"], r["runtime"],
                r["avg_power"], r["avg_sm"], r["avg_dram"],
                r["avg_power_x_gpu"], r["avg_sm_x_gpu"], r["avg_dram_x_gpu"]
            ])

    out_rank_csv = APP_DIR / f"{app}_independent_per_cap_rankings.csv"
    with out_rank_csv.open("w", newline="") as fh:
        wcsv = csv.writer(fh)
        wcsv.writerow([
            "power_cap", "pred_rank", "true_rank", "pred_optimal_gpu", "true_optimal_gpu", "top1_correct", "kendall_tau",
            "best_runtime", "worst_runtime", "speedup_vs_worst"
        ])
        for row in sorted(results, key=lambda x: x["power_cap"]):
            wcsv.writerow([
                row["power_cap"], str(row["pred_rank"]), str(row["true_rank"]),
                row["pred_optimal_gpu"], row["true_optimal_gpu"], row["top1_correct"], row["kendall_tau"],
                row["best_runtime"], row["worst_runtime"], row["speedup_vs_worst"]
            ])

    summary_rows.append([app, len(caps), top1_acc, mean_tau, str(caps), str(out_feat_csv), str(out_rank_csv)])

# Aggregate summary
summary_csv = ROOT_DIR / 'all_apps_independent_ranking_summary.csv'
with summary_csv.open('w', newline='') as fh:
    wcsv = csv.writer(fh)
    wcsv.writerow(['app', 'num_caps', 'top1_acc', 'mean_kendall_tau', 'caps', 'features_csv', 'rankings_csv'])
    for row in summary_rows:
        wcsv.writerow(row)

print(f"\nWrote summary: {summary_csv}")



=== lbm ===
Focused power caps: [800, 900, 1000, 1200, 1400, 1600, 2000]
Independent per-cap ranking:
  cap 800: pred=[4, 3, 2, 1] | true=[4, 3, 2, 1] | top1=1 | tau=1.000
  cap 900: pred=[4, 3, 2, 1] | true=[4, 3, 2, 1] | top1=1 | tau=1.000
  cap 1000: pred=[4, 3, 2, 1] | true=[4, 3, 2, 1] | top1=1 | tau=1.000
  cap 1200: pred=[4, 3, 2, 1] | true=[4, 3, 2, 1] | top1=1 | tau=1.000
  cap 1400: pred=[4, 3, 2, 1] | true=[4, 3, 2, 1] | top1=1 | tau=1.000
  cap 1600: pred=[4, 3, 2, 1] | true=[4, 3, 2, 1] | top1=1 | tau=1.000
  cap 2000: pred=[4, 3, 2, 1] | true=[4, 3, 2, 1] | top1=1 | tau=1.000
Top-1 optimal-gpu-count accuracy (independent per-cap): 7/7 = 1.000
Mean Kendall tau (independent per-cap): 1.000

=== cloverleaf ===
Focused power caps: [800, 900, 1000, 1200, 1400, 1600, 2000]
Independent per-cap ranking:
  cap 800: pred=[3, 2, 4, 1] | true=[3, 2, 4, 1] | top1=1 | tau=1.000
  cap 900: pred=[3, 4, 2, 1] | true=[3, 4, 2, 1] | top1=1 | tau=1.000
  cap 1000: pred=[4, 3, 2, 1] | true=[