In [1013]:
import os
import json
import pandas as pd
import sys

def load_json_files(directory):
    data_list = []
    file_map = {}

    for root, _, files in os.walk(directory):
        for filename in files:
            if filename.endswith(".json"):
                file_path = os.path.join(root, filename)
                try:
                    with open(file_path, "r") as f:
                        data = json.load(f)
                        data_list.append(data)
                        file_map[len(data_list) - 1] = file_path  # Track index to file
                except Exception as e:
                    print(f"Error reading {file_path}: {e}", file=sys.stderr)

    if not data_list:
        print("No valid JSON files found.", file=sys.stderr)
        return None

    df = pd.DataFrame(data_list)
    
    nan_rows = df.isna().any(axis=1)
    if nan_rows.any():
        print("Files with NaN values (dropped):")
        for idx in df[nan_rows].index:
            print(f"{file_map.get(idx, 'Unknown file')}")
        df = df.dropna()  # Drop rows with NaN values

    if "success" in df.columns:
        failed_rows = df["success"] == False
        if failed_rows.any():
            print("Files where 'success' is False:")
            for idx in df[failed_rows].index:
                print(f"{file_map.get(idx, 'Unknown file')}")
    sorted_df = df.sort_values(by=["m", "k", "n", "world_size"])
    return sorted_df


In [1014]:
# before_df = load_json_files("../slurm_logs_before")
# after_df = load_json_files("../slurm_logs_after")

before_df = load_json_files("../slurm_logs")
after_df = load_json_files("../slurm_logs")


In [1015]:
columns = ["m", "n", "k", "world_size", "streamk_sms", "communication_sms"]
metric_column = "triton_tflops"

merged_df = before_df.merge(after_df, on=columns, suffixes=("_before", "_after"))
merged_df["speedup"] = merged_df[f"{metric_column}_after"] / merged_df[f"{metric_column}_before"]
keep_columns = ["speedup", "success_before", "success_after", "triton_tflops_before", "triton_tflops_after"]
merged_df = merged_df[columns + keep_columns]

In [1016]:
merged_df

Unnamed: 0,m,n,k,world_size,streamk_sms,communication_sms,speedup,success_before,success_after,triton_tflops_before,triton_tflops_after
0,4096,8192,2048,1,256,48,1.0,True,True,11.064242,11.064242
1,4096,8192,2048,2,256,48,1.0,True,True,2.795715,2.795715
2,4096,8192,2048,4,256,48,1.0,True,True,1.434507,1.434507
3,4096,8192,2048,8,256,48,1.0,True,True,0.829149,0.829149
4,4864,8256,4096,1,256,48,1.0,True,True,19.69764,19.69764
5,4864,8256,4096,2,256,48,1.0,True,True,5.786424,5.786424
6,4864,8256,4096,4,256,48,1.0,True,True,3.525971,3.525971
7,4864,8256,4096,8,256,48,1.0,True,True,2.059325,2.059325
8,6144,16384,8192,1,256,48,1.0,True,True,35.18965,35.18965


In [1017]:
print(merged_df.to_markdown(index=False))

|    m |     n |    k |   world_size |   streamk_sms |   communication_sms |   speedup | success_before   | success_after   |   triton_tflops_before |   triton_tflops_after |
|-----:|------:|-----:|-------------:|--------------:|--------------------:|----------:|:-----------------|:----------------|-----------------------:|----------------------:|
| 4096 |  8192 | 2048 |            1 |           256 |                  48 |         1 | True             | True            |              11.0642   |             11.0642   |
| 4096 |  8192 | 2048 |            2 |           256 |                  48 |         1 | True             | True            |               2.79572  |              2.79572  |
| 4096 |  8192 | 2048 |            4 |           256 |                  48 |         1 | True             | True            |               1.43451  |              1.43451  |
| 4096 |  8192 | 2048 |            8 |           256 |                  48 |         1 | True             | True            |