In [None]:
from pprint import pprint
import json
import warnings
import pandas as pd
from TraceLens import TreePerfAnalyzer

In [None]:
def get_merged_comparison_with_mapping(name1, df_agg1, name2, df_agg2, mapping, include_counts=False):
    """
    Merges two DataFrames by creating an artificial merge key based on a mapping.
    """

    def merge_key_fn(row):
        name = row['name']
        return name if name not in mapping else mapping[name]
        
    df_agg1['merge_key'] = df_agg1.apply(merge_key_fn, axis=1)
    df_agg2['merge_key'] = df_agg2.apply(merge_key_fn, axis=1)

    # Merge using the artificial merge_key
    merged_df = pd.merge(
        df_agg1,
        df_agg2,
        on='merge_key',
        how='inner',
        suffixes=(f'_{name1}', f'_{name2}')
    )    
    # Replace merge_key with original names for clarity
    merged_df['name_1'] = merged_df[f'name_{name1}']
    merged_df['name_2'] = merged_df[f'name_{name2}']

    # Identify names that failed to merge
    not_merged_1 = set(df_agg1['name']) - set(merged_df[f'name_{name1}'])
    not_merged_2 = set(df_agg2['name']) - set(merged_df[f'name_{name2}'])

    # Raise a warning for any missing names
    if not_merged_1:
        warnings.warn(f"The following names in '{name1}' were not merged: {not_merged_1}")
    if not_merged_2:
        warnings.warn(f"The following names in '{name2}' were not merged: {not_merged_2}")

    
    # Calculate duration ratio for matched rows
    merged_df[f'Kernel Duration (ms) Ratio ({name1}/{name2})'] = (
        merged_df[f'total_direct_kernel_time_sum_{name1}'] /
        merged_df[f'total_direct_kernel_time_sum_{name2}']
    )
    
    # Rearrange columns
    hardware_specific_columns = [
        f'total_direct_kernel_time_sum_{name1}', f'total_direct_kernel_time_sum_{name2}',
    ]
    if include_counts:
        hardware_specific_columns.extend([f'Count_{name1}', f'Count_{name2}'])
    computed_columns = [
        f'Kernel Duration (ms) Ratio ({name1}/{name2})',
    ]
    columns_to_keep = [f'name_{name1}', f'name_{name2}'] + hardware_specific_columns + computed_columns
    
    # Select only the necessary columns
    final_df = merged_df[columns_to_keep]
    
    return final_df


In [None]:
path = '/path/to/pytorch_profile_A.json'
name_A = 'A'
perf_analyzer_A = TreePerfAnalyzer.from_file(path)
df_A_kernel_launchers = perf_analyzer_A.get_df_kernel_launchers()
df_agg_A_kernel_launchers = perf_analyzer_A.get_df_kernel_launchers_summary(df_A_kernel_launchers)


In [None]:
path = '/path/to/pytorch_profile_B.json'
name_B = 'B'
perf_analyzer_B = TreePerfAnalyzer.from_file(path)
df_B_kernel_launchers = perf_analyzer_B.get_df_kernel_launchers()
df_agg_B_kernel_launchers = perf_analyzer_B.get_df_kernel_launchers_summary(df_B_kernel_launchers)


In [None]:

dict_mergekey2names = {
    "conv leaf":["aten::cudnn_convolution", "aten::miopen_convolution"],
    "bn bwd leaf": ["aten::native_batch_norm_backward", "aten::miopen_batch_norm_backward"],
    "bn leaf": ["aten::native_batch_norm", "aten::miopen_batch_norm"],
    "fa bwd leaf": ["FlashAttnFuncBackward", "flash_attn::_flash_attn_backward",
                   "aten::_efficient_attention_backward", "aten::_scaled_dot_product_cudnn_attention_backward"],
    "fa fwd leaf": ["FlashAttnFunc", "flash_attn::_flash_attn_forward",
                    "aten::_efficient_attention_forward", "aten::_scaled_dot_product_cudnn_attention"]
}

dict_name2mergekey = {}
for mergekey, names in dict_mergekey2names.items():
    for name in names:
        dict_name2mergekey[name] = mergekey

# Generate the merged comparison
df_comparison_agg_kernel_launchers = get_merged_comparison_with_mapping(name_A, df_agg_A_kernel_launchers,
                                                           name_B, df_agg_B_kernel_launchers, dict_name2mergekey)
df_comparison_agg_kernel_launchers.head(20)
# dict_name2mergekey

In [None]:
def get_merged_comparison_for_op(name1, df1, name2, df2, op_name):
    """
    For a given op name, this function:
      1. Extracts the shapes breakdown summary for the op from each dataset.
      2. Merges the summaries on the 'Input Dims', 'Input type', and 'Input Strides' columns.
      3. Computes the ratio for the 'Total Kernel Time (ms)' metric between the two datasets.
      
    Parameters:
      name1 (str): Name for the first dataset
      df1 (pd.DataFrame): Kernel launchers DataFrame for the first dataset.
      name2 (str): Name for the second dataset
      df2 (pd.DataFrame): Kernel launchers DataFrame for the second dataset.
      op_name (str): The op (kernel launcher) name to filter by.
      
    Returns:
      pd.DataFrame: A merged DataFrame with the shapes breakdown comparison.
    """
    #TODO: handle ops that dont have same name in both dfs. eg: aten::miopen_convolution
    # Get the shapes breakdown summary for the op from each dataset.
    df1_shapes = TreePerfAnalyzer.get_df_kernel_launchers_summary_by_shape(df1, op_name)
    df2_shapes = TreePerfAnalyzer.get_df_kernel_launchers_summary_by_shape(df2, op_name)

    # Define the keys to merge on.
    merge_keys = ["Input Dims", "Input type", "Input Strides"]

    # Merge the two summaries on the specified keys.
    merged_df = pd.merge(df1_shapes, df2_shapes, on=merge_keys, suffixes=(f"_{name1}", f"_{name2}"))

    # Identify shape keys that did not merge from each side.
    import warnings
    # Convert the merge keys into tuples for each row.
    df1_key_tuples = df1_shapes[merge_keys].apply(lambda row: tuple(row), axis=1)
    df2_key_tuples = df2_shapes[merge_keys].apply(lambda row: tuple(row), axis=1)
    merged_key_tuples = merged_df[merge_keys].apply(lambda row: tuple(row), axis=1)
    
    not_merged_1 = set(df1_key_tuples) - set(merged_key_tuples)
    not_merged_2 = set(df2_key_tuples) - set(merged_key_tuples)
    
    if not_merged_1:
        warnings.warn(f"The following shape keys in '{name1}' were not merged: {not_merged_1}")
    if not_merged_2:
        warnings.warn(f"The following shape keys in '{name2}' were not merged: {not_merged_2}")

    # Compute a ratio of the Total Kernel Time (ms) between the two datasets.
    merged_df[f"Total Kernel Time (ms) Ratio ({name1}/{name2})"] = (
        merged_df[f"Total Kernel Time (ms)_{name1}"] / merged_df[f"Total Kernel Time (ms)_{name2}"]
    )

    # Rearranging columns for clarity.
    cols_to_keep = merge_keys + [
        f"Total Kernel Time (ms)_{name1}",
        f"Total Kernel Time (ms)_{name2}",
        f"Total Kernel Time (ms) Ratio ({name1}/{name2})"
    ]
    merged_df = merged_df[cols_to_keep]

    return merged_df


In [None]:
df_comparison_shapes = get_merged_comparison_for_op(
    name_A, df_A_kernel_launchers,
    name_B, df_B_kernel_launchers,
    "aten::clamp_min_"
)
df_comparison_shapes.head(20)
