In [64]:
import json, pandas as pd

def load_trace(path):
    with open(path, 'r') as f:
        data = json.load(f)
    if isinstance(data, dict) and 'traceEvents' in data:
        return data['traceEvents']
    elif isinstance(data, list):
        return data
    else:
        raise ValueError("Unrecognized trace format")

def format_shapes(input_shapes, output_shapes):
    """Format shape information into readable string."""
    result = []
    
    # Process input shapes
    for i, shape_dict in enumerate(input_shapes or []):
        for dtype, shape in shape_dict.items():
            shape_str = ",".join(map(str, shape))
            result.append(f"inputs[{i}] = {{{shape_str}}}")
    
    # Process output shapes
    for i, shape_dict in enumerate(output_shapes or []):
        for dtype, shape in shape_dict.items():
            shape_str = ",".join(map(str, shape))
            result.append(f"outputs[{i}] = {{{shape_str}}}")
    
    return " ".join(result)

def build_dataframe(events):
    rows = []
    for e in events:
        dur = e.get('dur')
        if dur is None: continue
        cat = e.get('cat')
        name = e.get('name','')
        args = e.get('args') or {}
        if cat == 'Node':
            prov = args.get('provider','CPUExecutionProvider')
            if prov == 'WebGpuExecutionProvider':
                prov = 'WebGPU-CPU'
            op   = args.get('op_name', name)
            input_type_shape = args.get('input_type_shape')
            output_type_shape = args.get('output_type_shape')
            rows.append({'ep':prov.replace('ExecutionProvider',''),
                         'op':op,'shader':None,'shape': format_shapes(input_type_shape, output_type_shape),'dur':dur})
        elif cat == 'Kernel':
            rows.append({'ep':'CUDA/ROCm','op':name,'shader':None,'shape':None,'dur':dur})
        elif cat == 'Api':
            parts = name.split('&')
            op     = parts[1] if len(parts)>=2 else name
            shader = parts[2] if len(parts)>=3 else None
            shape  = args.get('shapes')
            rows.append({'ep':'WebGPU-GPU','op':op,'shader':shader,'shape':shape,'dur':dur})
    return pd.DataFrame(rows)

def summarize(df, ep_name, group_keys):
    sub = df[df.ep==ep_name].copy()
    if sub.empty:
        print(f"No data for {ep_name}")
        return
    total = sub.dur.sum()
    keys  = [k for k in group_keys if k in sub.columns]
    agg = (sub.groupby(keys).dur
             .agg(count='size', total='sum')
             .reset_index()
          )
    agg['avg']     = agg.total/agg['count']
    agg['pct']     = 100*agg.total/total
    agg = agg.sort_values('total', ascending=False)
    agg['cum_pct'] = agg.pct.cumsum()
    print(f"\n--- {ep_name} by {'+'.join(keys)} ---")
    print(agg.round(2).to_string(index=False))

def show(path, shape=False):
    # Use raw string or double backslashes: r"C:\dev\ort_env\file.json"
    events = load_trace(path)
    df = build_dataframe(events)
    for ep in df.ep.unique():
        if ep != 'WebGPU-GPU':
            summarize(df, ep, ['op'] + (['shape'] if shape else []))
        else:
            summarize(df, 'WebGPU-GPU', ['op'] + (['shape'] if shape else []))
            summarize(df, 'WebGPU-GPU', ['shader'] + (['shape'] if shape else []))


In [65]:
show("C:\dev\ort_env\yolo11n_desktop_cpu.json", shape=True)

  show("C:\dev\ort_env\yolo11n_desktop_cpu.json", shape=True)



--- CPU by op+shape ---
           op                                                                                                                              shape  count  total     avg   pct  cum_pct
      Softmax                                                                               inputs[0] = {1,16,4,8400} outputs[0] = {1,16,4,8400}     21  84049 4002.33 11.25    11.25
ReorderOutput                                                                             inputs[0] = {1,16,320,320} outputs[0] = {1,16,320,320}     21  31590 1504.29  4.23    15.48
ReorderOutput                                                                             inputs[0] = {1,32,160,160} outputs[0] = {1,32,160,160}     42  17240  410.48  2.31    17.79
         Conv                                        inputs[0] = {1,64,80,80} inputs[1] = {64,64,3,3} inputs[2] = {64} outputs[0] = {1,64,80,80}     42  15999  380.93  2.14    19.93
 ReorderInput                                                    

In [66]:
show(r"C:\dev\ort_env\yolo11n_desktop_webgpu.json", shape=True)


--- WebGPU-CPU by op+shape ---
       op                                                                                                                              shape  count  total     avg   pct  cum_pct
     Conv                                       inputs[0] = {1,160,160,16} inputs[1] = {8,16,3,3} inputs[2] = {8} outputs[0] = {1,160,160,8}     21 142889 6804.24 13.64    13.64
     Conv                                        inputs[0] = {1,20,20,64} inputs[1] = {64,64,3,3} inputs[2] = {64} outputs[0] = {1,20,20,64}    189  37741  199.69  3.60    17.24
      Mul                                                        inputs[0] = {1,80,80,64} inputs[1] = {1,80,80,64} outputs[0] = {1,80,80,64}    147  32535  221.33  3.11    20.35
     Conv                                        inputs[0] = {1,80,80,32} inputs[1] = {16,32,3,3} inputs[2] = {16} outputs[0] = {1,80,80,16}     42  28052  667.90  2.68    23.03
      Mul                                                        inputs[0] = {

In [67]:
show(r'C:\dev\perf_results\pixel\image_perf_and_logs\yolo11n_cpu_2025-05-01_17-49-54.json', shape=True)


--- CPU by op+shape ---
         op                                                                                                                              shape  count  total     avg  pct  cum_pct
       Conv                                        inputs[0] = {1,64,80,80} inputs[1] = {64,64,3,3} inputs[2] = {64} outputs[0] = {1,64,80,80}      4  13744 3436.00 7.98     7.98
       Conv                                      inputs[0] = {1,64,160,160} inputs[1] = {64,64,3,3} inputs[2] = {64} outputs[0] = {1,64,80,80}      2  10985 5492.50 6.38    14.36
       Conv                                   inputs[0] = {1,128,80,80} inputs[1] = {128,128,3,3} inputs[2] = {128} outputs[0] = {1,128,40,40}      2   9521 4760.50 5.53    19.89
       Conv                                    inputs[0] = {1,16,320,320} inputs[1] = {32,16,3,3} inputs[2] = {32} outputs[0] = {1,32,160,160}      2   7347 3673.50 4.27    24.16
       Conv                                      inputs[0] = {1,3,640,640} input

In [68]:
show(r"C:\dev\perf_results\pixel\image_perf_and_logs\yolo11n_webgpu_2025-05-01_17-48-54.json", shape=True)


--- WebGPU-CPU by op+shape ---
       op                                                                                                                              shape  count  total      avg  pct  cum_pct
     Conv                                                         inputs[0] = {1,4,8400,16} inputs[1] = {1,16,1,1} outputs[0] = {1,4,8400,1}      2  41796 20898.00 7.26     7.26
     Conv                                      inputs[0] = {1,640,640,3} inputs[1] = {16,3,3,3} inputs[2] = {16} outputs[0] = {1,320,320,16}      2  41420 20710.00 7.19    14.45
     Conv                                       inputs[0] = {1,160,160,16} inputs[1] = {8,16,3,3} inputs[2] = {8} outputs[0] = {1,160,160,8}      2  41313 20656.50 7.17    21.62
   MatMul                                                       inputs[0] = {1,2,400,32} inputs[1] = {1,2,32,400} outputs[0] = {1,2,400,400}      2  39718 19859.00 6.90    28.52
     Conv                                    inputs[0] = {1,160,160,32} inputs