In [1]:
import os
import json
import pandas as pd
import altair as alt

In [2]:
def load_reference_jsons(root_path, filter = ""):
    records = []

    for dirpath, _, filenames in os.walk(root_path):
        if filter in dirpath:
            for file in filenames:
                if file.endswith(".json"):
                    file_path = os.path.join(dirpath, file)
                    try:
                        with open(file_path, 'r') as f:
                            data = json.load(f)
                            records.append(data)
                    except Exception as e:
                        print(f"Error loading {file_path}: {e}")

    return pd.DataFrame(records)

In [3]:
root_path = "../../../slurm_logs/latest"
df0 = load_reference_jsons(root_path)
df0 = df0.rename(columns={"triton_tflops": "flops"})

root_path = "../../../slurm_logs/reference-latest"
df1 = load_reference_jsons(root_path)

df = pd.concat([df0, df1], ignore_index=True)

In [4]:
df['algorithm'].unique()

array(['all_scatter', 'one_shot', 'torch_dist_all_gather',
       'torch_dist_all_reduce'], dtype=object)

In [5]:
df0

Unnamed: 0,world_size,m,n,k,debug,validate,trace_tiles,benchmark,datatype,algorithm,...,streamk_registers,streamk_spills,success,success_partial,flops,triton_ms,streamk_ms,streamk_experiments,communication_ms,communication_experiments
0,4,8192,1152,36864,True,True,False,True,fp32,all_scatter,...,192,0,True,True,299.620734,9.288873,8.522641,126,8.62393,126
1,2,8192,1792,14336,True,True,False,True,fp32,all_scatter,...,154,0,True,True,137.118407,6.139319,5.115671,126,5.903886,126
2,4,8192,8192,7168,True,True,False,True,fp32,one_shot,...,208,0,True,True,286.699205,13.422746,6.056076,126,12.9595,126
3,4,8192,8192,7680,True,True,False,True,fp32,one_shot,...,208,0,True,True,299.792954,13.753387,6.485487,126,13.262515,126
4,8,8192,512,14336,True,True,False,True,fp32,all_scatter,...,154,0,True,True,274.477397,3.505107,1.405507,126,3.191138,126
5,8,8192,1024,28672,True,True,False,True,fp32,all_scatter,...,204,0,True,True,496.228632,7.755076,3.930197,126,7.391598,126
6,8,8192,512,12288,True,True,False,True,fp32,all_scatter,...,154,0,True,True,250.128124,3.296845,1.141692,126,2.839896,126
7,8,8192,512,12288,True,True,False,True,fp32,all_scatter,...,154,0,True,True,249.212908,3.308953,1.199962,126,2.980715,126
8,4,8192,4096,3584,True,True,False,True,fp32,one_shot,...,256,113,True,True,70.76225,13.595846,12.319933,126,13.197508,126
9,8,8192,4096,1536,True,True,False,True,fp32,one_shot,...,256,112,True,True,84.513423,9.757429,4.532152,126,9.308397,126


In [6]:
import pandas as pd
import altair as alt

# Map algorithm codes to readable names
algo_map = {
    "torch_dist_all_gather": "RCCL AllGather",
    "torch_dist_all_reduce": "RCCL AllReduce",
    "all_reduce": "Iris AllReduce",
    "one_shot": "Iris OneShot",
    "all_scatter": "Iris AllScatter"
}
df["algo_label"] = df["algorithm"].map(algo_map)

# Create GEMM shape label
df["shape"] = df.apply(lambda row: f"M{row['M']}N{row['N']}K{row['K']}", axis=1)

# Sort shapes consistently
df = df.sort_values(by=["K", "shape"])
shape_order = df["shape"].drop_duplicates().tolist()

# Loop over each unique world size
for rank in sorted(df["world_size"].unique()):
    filtered = df[df["world_size"] == rank].copy()
    title = f"{rank} GPUs" if rank > 1 else f"{rank} GPU",
    chart = alt.Chart(filtered).mark_bar().encode(
        x=alt.X("shape:N", title="GEMM Shape", sort=shape_order),
        xOffset=alt.XOffset("algo_label:N"),
        y=alt.Y("flops:Q", title="FLOPS (GFLOP/s)", scale=alt.Scale(domainMin=0)),
        color=alt.Color("algo_label:N", title="Algorithm"),
        tooltip=["shape", "algo_label", "flops"]
    ).properties(
        title=title,
        height=300,
        width=60 * len(shape_order)
    ).configure_axisX(
        labelAngle=30
    ).configure_title(
        anchor="middle",
        fontSize=18
    )

    os.makedirs("compare", exist_ok=True)
    os.makedirs("compare/png", exist_ok=True)
    os.makedirs("compare/svg", exist_ok=True)
    os.makedirs("compare/pdf", exist_ok=True)

    fname = f"{rank}_GPUs"
    chart.save(f"compare/svg/{fname}.svg")
    chart.save(f"compare/png/{fname}.png", scale_factor=4)
    chart.save(f"compare/pdf/{fname}.pdf")
    
    chart.display()


In [7]:
df['algo_label'].unique()

array(['Iris AllScatter', 'Iris OneShot', 'RCCL AllGather',
       'RCCL AllReduce'], dtype=object)