In [1]:
import os
import json
import pandas as pd
import altair as alt

In [2]:
def load_reference_jsons(root_path, filter = ""):
    records = []

    for dirpath, _, filenames in os.walk(root_path):
        if filter in dirpath:
            for file in filenames:
                if file.endswith(".json"):
                    file_path = os.path.join(dirpath, file)
                    try:
                        with open(file_path, 'r') as f:
                            data = json.load(f)
                            records.append(data)
                    except Exception as e:
                        print(f"Error loading {file_path}: {e}")

    return pd.DataFrame(records)

In [3]:
root_path = "../../../slurm_logs/latest"
df = load_reference_jsons(root_path)
df
df['success'].unique()
df[df['success'] == False]

Unnamed: 0,world_size,m,n,k,debug,validate,trace_tiles,benchmark,datatype,algorithm,...,streamk_registers,streamk_spills,success,success_partial,triton_tflops,triton_ms,streamk_ms,streamk_experiments,communication_ms,communication_experiments


In [18]:
df.iloc[0]

index                                                                        0
world_size                                                                   8
m                                                                         8192
n                                                                          576
k                                                                        36864
debug                                                                     True
validate                                                                  True
trace_tiles                                                              False
benchmark                                                                 True
datatype                                                                  fp16
algorithm                                                          all_scatter
output_file                  /work1/amd/muhaawad/git/amd/pdp/iris/slurm_log...
BLK_M                                               

In [16]:
import pandas as pd
import altair as alt

# Step 1: Melt the DataFrame to long format
df_long = df.reset_index().melt(
    id_vars=["index"],
    value_vars=["communication_ms", "streamk_ms"],
    var_name="type",
    value_name="time_ms"
)

# Step 2: Plot with color legend by type
chart = alt.Chart(df_long).mark_circle(size=60).encode(
    x=alt.X("index:O", title="Experiment Index"),
    y=alt.Y("time_ms:Q", title="Kernel Time (ms)"),
    color=alt.Color("type:N", title="Kernel Type"),  # ← this creates the legend
    tooltip=["index", "type", "time_ms"]
).properties(
    title="Communication vs Stream Kernel Time",
    height=300,
    width=600
)

chart


In [5]:
for algorithm in df['algorithm'].unique():
    filtered_df = df[df['algorithm'] == algorithm].copy()
    filtered_df["shape"] = filtered_df.apply(lambda row: f"M{row['M']}N{row['N']}K{row['K']}", axis=1)

    title = ''
    if 'all_scatter' in algorithm:
        title += 'All Scatter'
    if 'all_reduce' in algorithm:
        title += 'All Reduce'
    if 'one_shot' in algorithm:
        title += 'One Shot'        
    title += ' (Iris)'

    filtered_df = filtered_df.sort_values(by=["M", "N", "K", "world_size"])

    chart = alt.Chart(filtered_df).mark_bar().encode(
        x=alt.X("world_size:O", title="World Size"),
        y=alt.Y("triton_tflops:Q", title="FLOPS (GFLOP/s)", scale=alt.Scale(domain=[0, 600])),
        color=alt.Color("world_size:N", title="World Size"),
        column=alt.Column("shape:N", title="", sort=filtered_df["shape"].unique().tolist()),
        tooltip=["shape", "world_size", "triton_tflops", "m", "n", "k"]
    ).properties(
        title=title,
        height=300
    ).configure_axisX(
        labelAngle=0
    ).configure_title(
        anchor="middle",
        fontSize=18,
        font='Helvetica'
    )
    fname = f"iris_{algorithm}_by_shape"
    chart.save(f'{fname}.svg')
    chart.save(f'{fname}.png', scale_factor=4)
    chart.save(f'{fname}.pdf')
    chart.display()


In [6]:
for algorithm in df['algorithm'].unique():
    filtered_df = df[df['algorithm'] == algorithm].copy()
    filtered_df["shape"] = filtered_df.apply(lambda row: f"M{row['M']}N{row['N']}K{row['K']}", axis=1)

    title = ''
    if 'all_scatter' in algorithm:
        title += 'All Scatter'
    if 'all_reduce' in algorithm:
        title += 'All Reduce'
    if 'one_shot' in algorithm:
        title += 'One Shot'        
    title += ' (Iris)'

    filtered_df = filtered_df.sort_values(by=["M", "N", "K", "world_size"])

    chart = alt.Chart(filtered_df).mark_bar().encode(
        x=alt.X("shape:N", title="", sort=filtered_df["shape"].unique().tolist()),
        y=alt.Y("triton_tflops:Q", title="FLOPS (GFLOP/s)", scale=alt.Scale(domain=[0, 600])),
        color=alt.Color("shape:N", legend=None),
        column=alt.Column("world_size:N", title="World Size"),
        tooltip=["shape", "world_size", "triton_tflops"]
    ).properties(
        title=title,
        height=300
    ).configure_axisX(
        labelAngle=45
    )

    chart = chart.configure_title(
        anchor="middle",     # Center the title
        fontSize=18,
        font='Helvetica'
    )
    fname = f"iris_{algorithm}_by_world_size"
    chart.save(f'{fname}.svg')
    chart.save(f'{fname}.png', scale_factor=4)
    chart.save(f'{fname}.pdf')
    display(chart)


In [7]:
!pwd

/work1/amd/muhaawad/git/amd/pdp/iris/examples/gemm/notebooks


In [8]:
import json

unique_mnk = df[["m", "n", "k"]].drop_duplicates()
mnk_list = unique_mnk.to_dict(orient="records")
with open("../../../dataset/mini_v2.json", "w") as f:
    json.dump(mnk_list, f, indent=4)

In [9]:
df[["m", "n", "k"]].drop_duplicates().sort_values('n')

Unnamed: 0,m,n,k
0,8192,576,36864


In [10]:

df_mnk = df[["M", "N", "K"]].drop_duplicates()
df_mnk = df_mnk.rename(columns={"M": "m", "N": "n", "K": "k"})

In [11]:
variants

NameError: name 'variants' is not defined

In [None]:
scales = [1, 2, 4, 8]

# Generate variants
variants = []
for _, row in df_mnk.iterrows():
    m = row["m"]
    n = row["n"]
    k = row["k"]
    for sn in scales:
        for sk in scales:
            variant = {
                "m": int(m),
                "n": int(n // sn),
                "k": int(k // sk)
            }
            variants.append(variant)

# Write to JSON
with open("../../../dataset/mini_v2.json", "w") as f:
    json.dump(variants, f, indent=4)