In [None]:
# Copyright (c) 2025 IBM
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

In [11]:
from pathlib import Path

BASE_DIR = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()
RESULTS_DIR = BASE_DIR / "results"

GHL_PATH = RESULTS_DIR / "ghl_profiler.csv"
GT_PATH = RESULTS_DIR / "graphtool_profiler.csv"
IG_PATH = RESULTS_DIR / "igraph_profiler_vf2.csv"
NX_PATH = RESULTS_DIR / "networkx_profiler_vf2.csv"


In [12]:
import pandas as pd
ghl_df = pd.read_csv(GHL_PATH, low_memory=False)
gt_df = pd.read_csv(GT_PATH, low_memory=False)
ig_df = pd.read_csv(IG_PATH, low_memory=False)
nx_df = pd.read_csv(NX_PATH, low_memory=False)

In [13]:
ghl_pairs = set(zip(ghl_df['target_graph_path'], ghl_df['subgraph_path']))
gt_pairs = set(zip(gt_df['target_graph_path'], gt_df['subgraph_path']))
ig_pairs = set(zip(ig_df['target_graph_path'], ig_df['subgraph_path']))
nx_pairs = set(zip(nx_df['target_graph_path'], nx_df['subgraph_path']))

In [14]:
common_pairs = ghl_pairs & nx_pairs & ig_pairs

In [15]:
print(f"Number of common pairs: {len(common_pairs)}")

Number of common pairs: 249


In [16]:
def filter_df(df, common_pairs):
    return df[df[['target_graph_path', 'subgraph_path']].apply(tuple, axis=1).isin(common_pairs)]

In [17]:
ghl_filtered = filter_df(ghl_df, common_pairs)
ig_filtered = filter_df(ig_df, common_pairs)
nx_filtered = filter_df(nx_df, common_pairs)
gt_filtered = filter_df(gt_df, common_pairs)

In [18]:
# SANITY CHECK
print(f"GHL Len: {len(ghl_filtered)}")
print(f"IG Len: {len(ig_filtered)}")
print(f"NX Len: {len(nx_filtered)}")
print(f"GT Len: {len(gt_filtered)}")

GHL Len: 249
IG Len: 249
NX Len: 249
GT Len: 178


In [19]:
# Fix the NX vertex problem:
nx_filtered = nx_filtered[nx_filtered['target_graph_num_vertices'] != 199]

dfs = {
    'ghl': ghl_filtered,
    'igraph': ig_filtered,
    'networx': nx_filtered,
    'gt': gt_filtered
}

rows = []
for lib, df in dfs.items():
    for target_size, group in df.groupby('target_graph_num_vertices'):
        mean_values = group['mean']
        found_matches = group['num_found_matchings']
        rows.append({
            "Target Size": target_size,
            "Library": lib,
            "Min": mean_values.min(),
            "25%": mean_values.quantile(0.25),
            "50%": mean_values.quantile(0.50),
            "75%": mean_values.quantile(0.75),
            "Max": mean_values.max(),
            "Mean": mean_values.mean(),
            "Sum": sum(mean_values),
            "Num Pairs": len(mean_values),
            "Mean Matches": found_matches.mean()
        })

combined_df = pd.DataFrame(rows)


In [20]:
# SANITY CHECK
print(sum(combined_df['Num Pairs']) / 4)

230.75


In [21]:
ghl_means = combined_df[combined_df['Library'] == "ghl"].set_index("Target Size")['Mean']

combined_df['ghl_comp'] = combined_df.apply(
    lambda row: row['Mean'] / ghl_means[row['Target Size']],
    axis=1
)
combined_df.sort_values(by=['Target Size', 'Library'], inplace=True)
def smart_format(x):
    try:
        x = float(x)
    except (ValueError, TypeError):
        return x  # leave strings unchanged
    if abs(x) < 1:
        return f"{x:.3E}"
    else:
        return f"{x:.3f}"
#pd.set_option("display.float_format", "{:.3E}".format)
formatted_df = combined_df.map(smart_format)
formatted_df

Unnamed: 0,Target Size,Library,Min,25%,50%,75%,Max,Mean,Sum,Num Pairs,Mean Matches,ghl_comp
0,200.0,ghl,0.0006755,0.001653,0.04122,0.07128,15.048,0.1863,20.303,109.0,6669.193,1.0
9,200.0,gt,0.02443,0.07377,0.4736,24.934,1474.942,93.31,6345.071,68.0,893.088,500.945
3,200.0,igraph,0.001446,0.01625,0.02813,0.06811,14.177,0.4207,45.853,109.0,6751.45,2.258
6,200.0,networx,0.02911,0.9219,2.205,4.076,134.738,5.532,591.885,107.0,592.131,29.697
1,400.0,ghl,0.001729,0.0731,0.1874,0.3213,0.5213,0.2032,16.053,79.0,745.81,1.0
10,400.0,gt,0.1721,0.5025,1.491,92.315,1331.213,72.773,4148.038,57.0,800.0,358.118
4,400.0,igraph,0.03361,0.08024,0.1384,0.2672,70.9,1.45,114.511,79.0,1179.0,7.133
7,400.0,networx,0.5083,6.5,12.756,28.627,156.602,22.096,1745.551,79.0,745.81,108.734
2,800.0,ghl,0.005046,0.7388,0.8979,1.415,2.08,1.043,63.623,61.0,1496.41,1.0
11,800.0,gt,1.581,4.419,5.646,57.254,490.184,82.18,4355.554,53.0,1600.0,78.793


In [None]:
formatted_df.to_csv(RESULTS_DIR / "ghl_nx_igraph_by_target_size2.csv", index=False)

In [None]:
# SANITY CHECK:
sample_ghl = ghl_filtered[ghl_filtered['target_graph_num_vertices'] == 200]
print(f"Number of 200 samples: {len(sample_ghl)}")
num_found_matchings = sample_ghl['num_found_matchings']
print(f"Mean found matchings: {sum(num_found_matchings) / len(num_found_matchings)}, max: {max(num_found_matchings)}, min: {min(num_found_matchings)}")

Number of 200 samples: 109
Mean found matchings: 6669.192660550459, max: 663552, min: 1


In [None]:
overall_ig_means = ig_filtered['mean'].mean()
overall_ghl_means = ghl_filtered['mean'].mean()
print(overall_ig_means)
print(overall_ghl_means)
print(f"Mean speedup over IGraph: {overall_ig_means / overall_ghl_means}")


0.9477828465688255
0.40152368531244975
Mean speedup over IGraph: 2.36046559951077


In [None]:
overall_gt_means = gt_filtered['mean'].mean()
# overall_gt_mean = gt_filtered['mean'].mean()
print(f"Mean speedup over GraphTool: {overall_gt_means/overall_ghl_means}")

Mean speedup over GraphTool: 207.757245575498


In [None]:
overall_nx_means = nx_filtered['mean'].mean()
print(f"Mean speedup over NetworkX: {overall_nx_means / overall_ghl_means}")

Mean speedup over NetworkX: 100.82599394182814
