In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import os
import sys
from pathlib import Path
import datetime as dt
import pandas as pd
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
import json


In [None]:
def load_ior_summary_list(json_file: Path):
    data = json.loads(json_file.read_text())
    return [
        {
            'Version': data['Version'],
            'Began': data['Began'],
            'Command line': data['Command line'],
            'Machine': data['Machine'],
            'Finished': data['Finished'],
            **{f'test_{k}': v for k, v in test.items() if k not in ["Results", "max", "Parameters", "Options"]},
            **{f"max_{k}": v for k, v in test["max"].items()},
            **{f"Parameters_{k}": v for k, v in test["Parameters"].items()},
            **{f"Options_{k}": v for k, v in test["Options"].items()},
            **{f'summary_{k}': v for k, v in data['summary'][index].items()},
            **{f'result_{k}': v for k, v in result.items()},
            'iorIteration': iteration,
        }
        for index, test in enumerate(data['tests'])
        for iteration, results in enumerate(test["Results"]) # iteration ?
        for result in results
    ]

In [None]:
IOR_OUTPUT_DIR = Path("../raw/ior")

df_src = pl.DataFrame([
    log
    for log_dir in IOR_OUTPUT_DIR.glob("*")
    for json in log_dir.glob("ior_summary_*.json")
    for log in load_ior_summary_list(json)
])

df_src.head(5)

In [None]:
df = df_src

groupby_columns=[
    "Options_nodes",
    "summary_numTasks",
    "summary_reorderTasks",
    "summary_transferSize",
    "result_access",
]

df_plot = (
    df
    .with_columns(
        pl.col("summary_reorderTasks").cast(pl.Boolean),
    )
    .group_by(groupby_columns)
    .agg([
        pl.mean("result_bwMiB").alias("result_bwMiB_mean"),
    ])
)

for access, remote in [("write", False), ("read", True), ("read", False)]:
    print(f"Access: {access}, Remote: {remote}")
    fig = plt.figure(figsize=(16, 9), dpi=100)
    fig.patch.set_alpha(1)
    (
        df_plot
        .filter((pl.col("result_access") == access) & (pl.col("summary_reorderTasks") == remote))
        .sort(["Options_nodes", "summary_transferSize"])
        .pivot(values="result_bwMiB_mean", index="Options_nodes", columns="summary_transferSize")
        .pipe(lambda df: (display(df), df)[-1])
        .to_pandas()
        .set_index("Options_nodes")
        .plot(
            ax=fig.add_subplot(),
            kind="bar",
            xlabel="Number of nodes",
            ylabel="Bandwidth [MiB/s]",
            rot=0,
            width=0.7,
        )
    )
    plt.show()
