In [79]:
import pandas as pd
import numpy as np
from sklearn.metrics import jaccard_score
import os

# === Configuration ===
colname = ' total(MB/s)'
time_col = 'Time Elapsed (s)'

# === ALTIS config ===
altis_base = '/Users/zhongzheng/Desktop/power/GPGPU/data/altis_power_res/no_power_shift/mem_throughput'
altis_benchmarks = [
    "bfs", "gemm", "pathfinder", "sort", "cfd", "cfd_double", "fdtd2d",
    "kmeans", "lavamd", "nw", "particlefilter_float", "raytracing","srad", "where"
]
altis_ts = [300, 500, 300, 500, 300, 500, 3000, 500, 500, 5000, 40, 50, 500, 300]
altis_threshold_map = dict(zip(altis_benchmarks, altis_ts))

# === ECP config ===
ecp_base = '/Users/zhongzheng/Desktop/power/GPGPU/data/ecp_power_res/no_power_shift/mem_throughput'
ecp_benchmarks = [
    'Laghos', 'miniGAN', 'sw4lite', 'UNet', 'Resnet50', 'bert_large', 'lammps', 'gromacs'
]

# ecp_benchmarks = []

ecp_ts = [500, 500, 1000, 500, 500, 130, 5000, 500]
ecp_threshold_map = dict(zip(ecp_benchmarks, ecp_ts))

# === Combine all benchmarks and thresholds ===
benchmark_sets = [
    ("altis", altis_base, altis_benchmarks, altis_threshold_map),
    ("ecp", ecp_base, ecp_benchmarks, ecp_threshold_map)
]

results = []

# === Core logic ===
for label, base_dir, benchmarks, threshold_map in benchmark_sets:
    for app in benchmarks:
        try:
            threshold = threshold_map[app]

            # Load CSVs
            path_max = os.path.join(base_dir, 'max_uncore', f'{app}.csv')
            path_mag = os.path.join(base_dir, 'dynamic_uncore', f'{app}.csv')

            df_max = pd.read_csv(path_max)
            df_mag = pd.read_csv(path_mag)

            # Filter by time for specific apps
            if app == 'bert_large':
                df_max = df_max[df_max[time_col] > 220]
                df_mag = df_mag[df_mag[time_col] > 220]
            elif app == 'Resnet50':
                df_max = df_max[df_max[time_col] > 60]
                df_mag = df_mag[df_mag[time_col] > 60]

            # Parse throughput values
            T_max = pd.to_numeric(df_max[colname].astype(str).str.strip(), errors='coerce').dropna().values
            T_mag = pd.to_numeric(df_mag[colname].astype(str).str.strip(), errors='coerce').dropna().values

            # Align lengths
            min_len = min(len(T_max), len(T_mag))
            T_max = T_max[:min_len]
            T_mag = T_mag[:min_len]

            # Burst masks based on absolute value threshold
            burst_max = (T_max > threshold).astype(int)
            burst_mag = (T_mag > threshold).astype(int)

            # Compute Jaccard similarity
            jaccard = jaccard_score(burst_max, burst_mag)
            results.append((label, app, threshold, round(jaccard, 3)))

        except Exception as e:
            results.append((label, app, "N/A", f"ERROR: {e}"))

# === Print Results ===
print("\n=== Jaccard Similarity on Absolute Throughput (Per-App Thresholds) ===")
print(f"{'Group':<6} {'App':<20} {'Threshold':<10} {'Jaccard'}")
for group, app, thresh, score in results:
    print(f"{group:<6} {app:<20} {thresh:<10} {score}")



=== Jaccard Similarity on Absolute Throughput (Per-App Thresholds) ===
Group  App                  Threshold  Jaccard
altis  bfs                  300        0.963
altis  gemm                 500        0.909
altis  pathfinder           300        0.955
altis  sort                 500        0.995
altis  cfd                  300        0.891
altis  cfd_double           500        0.833
altis  fdtd2d               3000       0.97
altis  kmeans               500        0.882
altis  lavamd               500        0.801
altis  nw                   5000       0.877
altis  particlefilter_float 40         0.684
altis  raytracing           50         0.849
altis  srad                 500        0.949
altis  where                300        0.988
ecp    Laghos               500        0.995
ecp    miniGAN              500        0.922
ecp    sw4lite              1000       0.864
ecp    UNet                 500        0.931
ecp    Resnet50             500        0.804
ecp    bert_large          

In [165]:
import pandas as pd
import numpy as np
from sklearn.metrics import jaccard_score
import os

# === Configuration ===
colname = ' total(MB/s)'        # Throughput column
time_col = 'Time Elapsed (s)'   # Timestamp column

# === Benchmark groups ===
altis_base = '/Users/zhongzheng/Desktop/power/GPGPU/data/altis_power_res/no_power_shift/mem_throughput'
altis_benchmarks = [
    "bfs", "gemm", "pathfinder", "sort", "cfd", "cfd_double", "fdtd2d",
    "kmeans", "lavamd", "nw", "particlefilter_float", "raytracing", "where"
]

ecp_base = '/Users/zhongzheng/Desktop/power/GPGPU/data/ecp_power_res/no_power_shift/mem_throughput'
ecp_benchmarks = [
    'Laghos', 'miniGAN', 'sw4lite', 'UNet', 'Resnet50', 'bert_large', 'lammps', 'gromacs'
]

benchmark_sets = [
    ("altis", altis_base, altis_benchmarks),
    ("ecp", ecp_base, ecp_benchmarks)
]

results = []

# === Core logic ===
for label, base_dir, benchmarks in benchmark_sets:
    for app in benchmarks:
        try:
            # Load CSVs
            path_max = os.path.join(base_dir, 'max_uncore', f'{app}.csv')
            path_mag = os.path.join(base_dir, 'dynamic_uncore', f'{app}.csv')

            df_max = pd.read_csv(path_max)
            df_mag = pd.read_csv(path_mag)

            # Filter by time (optional per app)
            if app == 'bert_large':
                df_max = df_max[df_max[time_col].between(220, 300)]
                df_mag = df_mag[df_mag[time_col].between(220, 300)]

            elif app == 'Resnet50':
                df_max = df_max[df_max[time_col] > 60]
                df_mag = df_mag[df_mag[time_col] > 60]

            # Clean and parse throughput column
            T_max = pd.to_numeric(df_max[colname].astype(str).str.strip(), errors='coerce').dropna().values
            T_mag = pd.to_numeric(df_mag[colname].astype(str).str.strip(), errors='coerce').dropna().values

            # Align lengths
            min_len = min(len(T_max), len(T_mag))
            T_max = T_max[:min_len]
            T_mag = T_mag[:min_len]

            # === Adaptive threshold: 50% of max ==
            

            adaptive_threshold = min(T_mag.min() * 1.3, T_mag.max())
            adaptive_threshold = T_mag.min() * 1.5
            # Binary burst masks
            burst_max = (T_max > adaptive_threshold).astype(int)
            burst_mag = (T_mag > adaptive_threshold).astype(int)

            # Jaccard score
            jaccard = jaccard_score(burst_max, burst_mag)
            results.append((label, app, round(adaptive_threshold, 1), round(jaccard, 3)))

        except Exception as e:
            results.append((label, app, "N/A", f"ERROR: {e}"))

# === Print Results ===
print("\n=== Jaccard Burst Overlap Similarity (Adaptive Thresholds) ===")
print(f"{'Group':<6} {'App':<20} {'Threshold':<10} {'Jaccard'}")
for group, app, thresh, score in results:
    print(f"{group:<6} {app:<20} {score}")



=== Jaccard Burst Overlap Similarity (Adaptive Thresholds) ===
Group  App                  Threshold  Jaccard
altis  bfs                  0.995
altis  gemm                 0.714
altis  pathfinder           0.98
altis  sort                 0.968
altis  cfd                  0.947
altis  cfd_double           0.631
altis  fdtd2d               0.4
altis  kmeans               0.977
altis  lavamd               0.922
altis  nw                   1.0
altis  particlefilter_float 0.675
altis  raytracing           0.87
altis  where                0.946
ecp    Laghos               0.995
ecp    miniGAN              0.988
ecp    sw4lite              0.872
ecp    UNet                 0.99
ecp    Resnet50             0.964
ecp    bert_large           0.844
ecp    lammps               0.995
ecp    gromacs              0.993


In [111]:
import pandas as pd
import numpy as np
from sklearn.metrics import jaccard_score
import os

# === Configuration ===
colname = ' total(MB/s)'
time_col = 'Time Elapsed (s)'

# === Benchmark groups ===
altis_base = '/Users/zhongzheng/Desktop/power/GPGPU/data/altis_power_res/no_power_shift/mem_throughput'
altis_benchmarks = [
    "bfs", "gemm", "pathfinder", "sort", "cfd", "cfd_double", "fdtd2d",
    "kmeans", "lavamd", "nw", "particlefilter_float", "raytracing", "where"
]

ecp_base = '/Users/zhongzheng/Desktop/power/GPGPU/data/ecp_power_res/no_power_shift/mem_throughput'
ecp_benchmarks = [
    'Laghos', 'miniGAN', 'sw4lite', 'UNet', 'Resnet50', 'bert_large', 'lammps', 'gromacs'
]

benchmark_sets = [
    ("altis", altis_base, altis_benchmarks),
    ("ecp", ecp_base, ecp_benchmarks)
]

# === Fuzzy Jaccard with ±tolerance index ===
def fuzzy_jaccard(burst_a, burst_b, tolerance=1):
    a_idx = np.where(burst_a == 1)[0]
    b_idx = np.where(burst_b == 1)[0]

    matched_a = set()
    matched_b = set()

    for ai in a_idx:
        # Find all b indices within ±tolerance of ai
        nearby_b = b_idx[np.abs(b_idx - ai) <= tolerance]
        if len(nearby_b) > 0:
            matched_a.add(ai)
            matched_b.update(nearby_b)

    intersection = len(matched_a)
    union = len(set(a_idx).union(set(b_idx)))

    if union == 0:
        return 1.0 if intersection == 0 else 0.0

    return intersection / union

# === Main loop ===
results = []

for label, base_dir, benchmarks in benchmark_sets:
    for app in benchmarks:
        try:
            # Load CSVs
            path_max = os.path.join(base_dir, 'max_uncore', f'{app}.csv')
            path_mag = os.path.join(base_dir, 'dynamic_uncore', f'{app}.csv')

            df_max = pd.read_csv(path_max)
            df_mag = pd.read_csv(path_mag)

            # Optional filtering
            if app == 'bert_large':
                df_max = df_max[df_max[time_col].between(220, 300)]
                df_mag = df_mag[df_mag[time_col].between(220, 300)]
            elif app == 'Resnet50':
                df_max = df_max[df_max[time_col].between(60, 300)]
                df_mag = df_mag[df_mag[time_col].between(60, 300)]

            # Parse throughput column
            T_max = pd.to_numeric(df_max[colname].astype(str).str.strip(), errors='coerce').dropna().values
            T_mag = pd.to_numeric(df_mag[colname].astype(str).str.strip(), errors='coerce').dropna().values

            # Align lengths
            min_len = min(len(T_max), len(T_mag))
            T_max = T_max[:min_len]
            T_mag = T_mag[:min_len]

            # Adaptive threshold based on 90% of T_max
            adaptive_threshold = T_max.max() * 0.3

            # Binary burst detection
            burst_max = (T_max > adaptive_threshold).astype(int)
            burst_mag = (T_mag > adaptive_threshold).astype(int)

            # Fuzzy Jaccard (±1 time step tolerance)
            jaccard = round(fuzzy_jaccard(burst_max, burst_mag, tolerance=1), 3)

            results.append((label, app, round(adaptive_threshold, 1), jaccard))

        except Exception as e:
            results.append((label, app, "N/A", f"ERROR: {e}"))

# === Print Results ===
print("\n=== Jaccard Burst Overlap Similarity (Fuzzy ±1s Tolerance) ===")
print(f"{'Group':<6} {'App':<20} {'Threshold':<10} {'Jaccard'}")
for group, app, thresh, score in results:
    print(f"{group:<6} {app:<20} {thresh:<10} {score}")



=== Jaccard Burst Overlap Similarity (Fuzzy ±1s Tolerance) ===
Group  App                  Threshold  Jaccard
altis  bfs                  9379.1     0.871
altis  gemm                 2208.2     1.0
altis  pathfinder           3113.3     0.952
altis  sort                 4074.3     0.515
altis  cfd                  4684.4     0.643
altis  cfd_double           3930.6     0.5
altis  fdtd2d               5571.1     0.5
altis  kmeans               4455.9     0.333
altis  lavamd               9398.5     0.083
altis  nw                   3164.8     0.933
altis  particlefilter_float 539.3      0.615
altis  raytracing           167.4      0.133
altis  where                5958.8     0.364
ecp    Laghos               576.5      0.941
ecp    miniGAN              20899.6    0.077
ecp    sw4lite              6424.1     0.875
ecp    UNet                 43410.3    0.176
ecp    Resnet50             4740.6     0.642
ecp    bert_large           284.8      0.09
ecp    lammps               7215.1     0.