In [None]:
from TraceLens import NcclAnalyser

In [None]:
root_dir = '/path/to/your/trace/files/directory'
world_size = 8
list_profile_filepaths = [os.path.join(root_dir, f'rank{i}_trace.json') for i in range(world_size)]


In [None]:
my_nccl_analyser = NcclAnalyser(list_profile_filepaths, world_size)

In [None]:
# Quick Start
# we look at the summary for implicit sync category
# this covers the important and common collective operations
# allreduce, reducescatter, allgather and alltoall (not alltoallv)
df_summary = my_nccl_analyser.build_df_summary_nccl_implicit_sync_cat(agg_metrics=['mean'])
df_summary

In [None]:
# For power users, we can look at many more dfs

# first we look at the per rank collective df aka "df long"
# here each row is a collective kernel on a rank
df_long = my_nccl_analyser.build_df_long()

In [None]:
# Note that we also construct a collective id column to uniquely identify each collective across ranks
# this is constructed as "Process Group Name" + "_" + "index_in_group"
# the index_in_group is the order in which the collective appears in the trace
# the earliest collective has index 0, the next one has index 1 and so on
df_long.head()

In [None]:
# next we look at the implicit sync category in more detail
# here we have one row per collective operation

# we calculate the communication latency as the minimum duration across ranks
# we compute algo bw as in msg size / comm latency
# we compute bus bw as algo bw * scaling factor
df_implicit_sync_cat = my_nccl_analyser.build_df_nccl_implicit_sync_cat()
df_implicit_sync_cat.head()

In [None]:
# we can also look at the detailed version of the implicit sync category
# this includes per rank timestamps and durations 
df_implicit_sync_cat_detailed = my_nccl_analyser.build_df_nccl_implicit_sync_cat(detailed=True)
df_implicit_sync_cat_detailed.head()

In [None]:
# We look at alltoallv separately
# This is because the data sent and received by each rank can be different
# This does not respect the implicit sync property
# We expose raw data and leave the calculations to the user
# We will add more metrics based on user feedback

df_all2allv = my_nccl_analyser.build_df_nccl_all2allv()
df_all2allv.head()