In [1]:
# Copyright (c) Meta Platforms, Inc. and affiliates.

In [2]:
import copy
import json
import os

import numpy as np

from pycocotools.coco import COCO
from sam3.eval.cgf1_eval import CGF1Evaluator

# SA-Co/Gold

In [3]:
# Update to the directory where the GT annotation and PRED files exist
GT_DIR = # PUT YOUR PATH HERE
PRED_DIR = # PUT YOUR PATH HERE

In [4]:
# Relative file names for GT files for 7 SA-Co/Gold subsets
saco_gold_gts = {
    # MetaCLIP Captioner
    "metaclip_nps": [
            "gold_metaclip_merged_a_release_test.json",
            "gold_metaclip_merged_b_release_test.json",
            "gold_metaclip_merged_c_release_test.json",
    ],
    # SA-1B captioner
    "sa1b_nps": [
            "gold_sa1b_merged_a_release_test.json",
            "gold_sa1b_merged_b_release_test.json",
            "gold_sa1b_merged_c_release_test.json",
    ],
    # Crowded
    "crowded": [
            "gold_crowded_merged_a_release_test.json",
            "gold_crowded_merged_b_release_test.json",
            "gold_crowded_merged_c_release_test.json",
    ],
    # FG Food
    "fg_food": [
            "gold_fg_food_merged_a_release_test.json",
            "gold_fg_food_merged_b_release_test.json",
            "gold_fg_food_merged_c_release_test.json",
    ],
    # FG Sports
    "fg_sports_equipment": [
            "gold_fg_sports_equipment_merged_a_release_test.json",
            "gold_fg_sports_equipment_merged_b_release_test.json",
            "gold_fg_sports_equipment_merged_c_release_test.json",
    ],
    # Attributes
    "attributes": [
            "gold_attributes_merged_a_release_test.json",
            "gold_attributes_merged_b_release_test.json",
            "gold_attributes_merged_c_release_test.json",
    ],
    # Wiki common
    "wiki_common": [
            "gold_wiki_common_merged_a_release_test.json",
            "gold_wiki_common_merged_b_release_test.json",
            "gold_wiki_common_merged_c_release_test.json",
    ],
}

## Run offline evaluation for all 7 SA-Co/Gold subsets

We assume the inference has already been run for all 7 datasets. With the default configurations, the predictions are dumped in a predictable folder

In [5]:
results_gold = {}
results_gold_bbox = {}

for subset_name, gts in saco_gold_gts.items():
    print("Processing subset: ", subset_name)
    gt_paths = [os.path.join(GT_DIR, gt) for gt in gts]
    pred_path = os.path.join(PRED_DIR, f"gold_{subset_name}/dumps/gold_{subset_name}/coco_predictions_segm.json")
    
    evaluator = CGF1Evaluator(gt_path=gt_paths, verbose=True, iou_type="segm") 
    summary = evaluator.evaluate(pred_path)
    print(summary)

    cur_results = {}
    cur_results["cgf1"] = summary["cgF1_eval_segm_cgF1"] * 100
    cur_results["il_mcc"] = summary["cgF1_eval_segm_IL_MCC"]
    cur_results["pmf1"] = summary["cgF1_eval_segm_positive_micro_F1"] * 100
    results_gold[subset_name] = cur_results

    # Also eval bbox    
    evaluator = CGF1Evaluator(gt_path=gt_paths, verbose=True, iou_type="bbox") 
    summary = evaluator.evaluate(pred_path)
    print(summary)

    cur_results = {}
    cur_results["cgf1"] = summary["cgF1_eval_bbox_cgF1"] * 100
    cur_results["il_mcc"] = summary["cgF1_eval_bbox_IL_MCC"]
    cur_results["pmf1"] = summary["cgF1_eval_bbox_positive_micro_F1"] * 100
    results_gold_bbox[subset_name] = cur_results

Processing subset:  metaclip_nps
loading annotations into memory...
Done (t=0.28s)
creating index...
index created!
loading annotations into memory...
Done (t=0.26s)
creating index...
index created!
loading annotations into memory...
Done (t=0.27s)
creating index...
index created!
Loaded 26221 predictions


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33057/33057 [00:10<00:00, 3171.54it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.473
 Average precision  @[ IoU=0.50:0.95] = 0.609
 Average recall     @[ IoU=0.50:0.95] = 0.532
 Average F1         @[ IoU=0.50:0.95] = 0.568
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.759
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.586
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.652
 Average IL_precision                = 0.916
 Average IL_recall                   = 0.760
 Average IL_F1                       = 0.830
 Average IL_FPR                      = 0.013
 Average IL_MCC                      = 0.807
 Average cgF1       @[ IoU=0.50     ] = 0.568
 Average precision  @[ IoU=0.50     ] = 0.732
 Average recall     @[ IoU=0.50     ] = 0.639
 Average F1         @[ IoU=0.50     ] = 0.682
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.872
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.704
 Average positive_micro_precision @[ IoU=0.50     ] = 0.783
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33057/33057 [00:08<00:00, 3762.56it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.500
 Average precision  @[ IoU=0.50:0.95] = 0.645
 Average recall     @[ IoU=0.50:0.95] = 0.563
 Average F1         @[ IoU=0.50:0.95] = 0.601
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.813
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.620
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.690
 Average IL_precision                = 0.916
 Average IL_recall                   = 0.760
 Average IL_F1                       = 0.831
 Average IL_FPR                      = 0.013
 Average IL_MCC                      = 0.807
 Average cgF1       @[ IoU=0.50     ] = 0.571
 Average precision  @[ IoU=0.50     ] = 0.736
 Average recall     @[ IoU=0.50     ] = 0.642
 Average F1         @[ IoU=0.50     ] = 0.686
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.878
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.707
 Average positive_micro_precision @[ IoU=0.50     ] = 0.787
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12893/12893 [00:12<00:00, 1019.95it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.537
 Average precision  @[ IoU=0.50:0.95] = 0.613
 Average recall     @[ IoU=0.50:0.95] = 0.624
 Average F1         @[ IoU=0.50:0.95] = 0.618
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.749
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.626
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.627
 Average IL_precision                = 0.957
 Average IL_recall                   = 0.918
 Average IL_F1                       = 0.937
 Average IL_FPR                      = 0.055
 Average IL_MCC                      = 0.858
 Average cgF1       @[ IoU=0.50     ] = 0.662
 Average precision  @[ IoU=0.50     ] = 0.755
 Average recall     @[ IoU=0.50     ] = 0.769
 Average F1         @[ IoU=0.50     ] = 0.762
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.868
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.771
 Average positive_micro_precision @[ IoU=0.50     ] = 0.773
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12893/12893 [00:07<00:00, 1636.66it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.554
 Average precision  @[ IoU=0.50:0.95] = 0.633
 Average recall     @[ IoU=0.50:0.95] = 0.642
 Average F1         @[ IoU=0.50:0.95] = 0.637
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.786
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.645
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.648
 Average IL_precision                = 0.957
 Average IL_recall                   = 0.918
 Average IL_F1                       = 0.937
 Average IL_FPR                      = 0.055
 Average IL_MCC                      = 0.858
 Average cgF1       @[ IoU=0.50     ] = 0.656
 Average precision  @[ IoU=0.50     ] = 0.749
 Average recall     @[ IoU=0.50     ] = 0.760
 Average F1         @[ IoU=0.50     ] = 0.755
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.863
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.764
 Average positive_micro_precision @[ IoU=0.50     ] = 0.768
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20241/20241 [00:10<00:00, 2003.85it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.611
 Average precision  @[ IoU=0.50:0.95] = 0.643
 Average recall     @[ IoU=0.50:0.95] = 0.686
 Average F1         @[ IoU=0.50:0.95] = 0.664
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.689
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.677
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.669
 Average IL_precision                = 0.933
 Average IL_recall                   = 0.913
 Average IL_F1                       = 0.923
 Average IL_FPR                      = 0.018
 Average IL_MCC                      = 0.902
 Average cgF1       @[ IoU=0.50     ] = 0.735
 Average precision  @[ IoU=0.50     ] = 0.773
 Average recall     @[ IoU=0.50     ] = 0.825
 Average F1         @[ IoU=0.50     ] = 0.798
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.830
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.815
 Average positive_micro_precision @[ IoU=0.50     ] = 0.805
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20241/20241 [00:07<00:00, 2785.45it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.617
 Average precision  @[ IoU=0.50:0.95] = 0.650
 Average recall     @[ IoU=0.50:0.95] = 0.692
 Average F1         @[ IoU=0.50:0.95] = 0.670
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.731
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.684
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.677
 Average IL_precision                = 0.933
 Average IL_recall                   = 0.913
 Average IL_F1                       = 0.923
 Average IL_FPR                      = 0.018
 Average IL_MCC                      = 0.902
 Average cgF1       @[ IoU=0.50     ] = 0.730
 Average precision  @[ IoU=0.50     ] = 0.769
 Average recall     @[ IoU=0.50     ] = 0.818
 Average F1         @[ IoU=0.50     ] = 0.793
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.829
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.809
 Average positive_micro_precision @[ IoU=0.50     ] = 0.801
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13794/13794 [00:03<00:00, 3963.81it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.534
 Average precision  @[ IoU=0.50:0.95] = 0.734
 Average recall     @[ IoU=0.50:0.95] = 0.583
 Average F1         @[ IoU=0.50:0.95] = 0.650
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.825
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.673
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.795
 Average IL_precision                = 0.917
 Average IL_recall                   = 0.713
 Average IL_F1                       = 0.802
 Average IL_FPR                      = 0.006
 Average IL_MCC                      = 0.794
 Average cgF1       @[ IoU=0.50     ] = 0.582
 Average precision  @[ IoU=0.50     ] = 0.800
 Average recall     @[ IoU=0.50     ] = 0.635
 Average F1         @[ IoU=0.50     ] = 0.708
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.885
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.733
 Average positive_micro_precision @[ IoU=0.50     ] = 0.866
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13794/13794 [00:03<00:00, 4490.07it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.538
 Average precision  @[ IoU=0.50:0.95] = 0.737
 Average recall     @[ IoU=0.50:0.95] = 0.588
 Average F1         @[ IoU=0.50:0.95] = 0.654
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.859
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.676
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.797
 Average IL_precision                = 0.919
 Average IL_recall                   = 0.714
 Average IL_F1                       = 0.804
 Average IL_FPR                      = 0.006
 Average IL_MCC                      = 0.795
 Average cgF1       @[ IoU=0.50     ] = 0.583
 Average precision  @[ IoU=0.50     ] = 0.798
 Average recall     @[ IoU=0.50     ] = 0.637
 Average F1         @[ IoU=0.50     ] = 0.708
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.898
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.733
 Average positive_micro_precision @[ IoU=0.50     ] = 0.864
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12107/12107 [00:03<00:00, 3306.95it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.655
 Average precision  @[ IoU=0.50:0.95] = 0.733
 Average recall     @[ IoU=0.50:0.95] = 0.701
 Average F1         @[ IoU=0.50:0.95] = 0.717
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.840
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.738
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.778
 Average IL_precision                = 0.962
 Average IL_recall                   = 0.850
 Average IL_F1                       = 0.903
 Average IL_FPR                      = 0.006
 Average IL_MCC                      = 0.888
 Average cgF1       @[ IoU=0.50     ] = 0.737
 Average precision  @[ IoU=0.50     ] = 0.825
 Average recall     @[ IoU=0.50     ] = 0.789
 Average F1         @[ IoU=0.50     ] = 0.807
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.920
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.830
 Average positive_micro_precision @[ IoU=0.50     ] = 0.875
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12107/12107 [00:03<00:00, 3661.16it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.681
 Average precision  @[ IoU=0.50:0.95] = 0.760
 Average recall     @[ IoU=0.50:0.95] = 0.730
 Average F1         @[ IoU=0.50:0.95] = 0.745
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.879
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.766
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.806
 Average IL_precision                = 0.962
 Average IL_recall                   = 0.850
 Average IL_F1                       = 0.903
 Average IL_FPR                      = 0.006
 Average IL_MCC                      = 0.888
 Average cgF1       @[ IoU=0.50     ] = 0.735
 Average precision  @[ IoU=0.50     ] = 0.821
 Average recall     @[ IoU=0.50     ] = 0.788
 Average F1         @[ IoU=0.50     ] = 0.804
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.921
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.827
 Average positive_micro_precision @[ IoU=0.50     ] = 0.870
 Average cgF1       @[ IoU=

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9222/9222 [00:03<00:00, 2820.36it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.549
 Average precision  @[ IoU=0.50:0.95] = 0.643
 Average recall     @[ IoU=0.50:0.95] = 0.670
 Average F1         @[ IoU=0.50:0.95] = 0.656
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.872
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.720
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.778
 Average IL_precision                = 0.797
 Average IL_recall                   = 0.819
 Average IL_F1                       = 0.808
 Average IL_FPR                      = 0.048
 Average IL_MCC                      = 0.763
 Average cgF1       @[ IoU=0.50     ] = 0.600
 Average precision  @[ IoU=0.50     ] = 0.703
 Average recall     @[ IoU=0.50     ] = 0.733
 Average F1         @[ IoU=0.50     ] = 0.717
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.930
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.787
 Average positive_micro_precision @[ IoU=0.50     ] = 0.850
 Average cgF1       @[ IoU=

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9222/9222 [00:02<00:00, 3370.79it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.565
 Average precision  @[ IoU=0.50:0.95] = 0.660
 Average recall     @[ IoU=0.50:0.95] = 0.689
 Average F1         @[ IoU=0.50:0.95] = 0.674
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.901
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.739
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.798
 Average IL_precision                = 0.798
 Average IL_recall                   = 0.819
 Average IL_F1                       = 0.808
 Average IL_FPR                      = 0.048
 Average IL_MCC                      = 0.764
 Average cgF1       @[ IoU=0.50     ] = 0.602
 Average precision  @[ IoU=0.50     ] = 0.703
 Average recall     @[ IoU=0.50     ] = 0.734
 Average F1         @[ IoU=0.50     ] = 0.718
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.934
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.788
 Average positive_micro_precision @[ IoU=0.50     ] = 0.850
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 65452/65452 [00:11<00:00, 5775.55it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.425
 Average precision  @[ IoU=0.50:0.95] = 0.677
 Average recall     @[ IoU=0.50:0.95] = 0.509
 Average F1         @[ IoU=0.50:0.95] = 0.581
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.811
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.608
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.757
 Average IL_precision                = 0.822
 Average IL_recall                   = 0.607
 Average IL_F1                       = 0.698
 Average IL_FPR                      = 0.004
 Average IL_MCC                      = 0.699
 Average cgF1       @[ IoU=0.50     ] = 0.482
 Average precision  @[ IoU=0.50     ] = 0.767
 Average recall     @[ IoU=0.50     ] = 0.577
 Average F1         @[ IoU=0.50     ] = 0.658
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.905
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.689
 Average positive_micro_precision @[ IoU=0.50     ] = 0.857
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 65452/65452 [00:10<00:00, 6310.26it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.443
 Average precision  @[ IoU=0.50:0.95] = 0.706
 Average recall     @[ IoU=0.50:0.95] = 0.529
 Average F1         @[ IoU=0.50:0.95] = 0.605
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.865
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.633
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.788
 Average IL_precision                = 0.822
 Average IL_recall                   = 0.607
 Average IL_F1                       = 0.698
 Average IL_FPR                      = 0.004
 Average IL_MCC                      = 0.699
 Average cgF1       @[ IoU=0.50     ] = 0.483
 Average precision  @[ IoU=0.50     ] = 0.770
 Average recall     @[ IoU=0.50     ] = 0.578
 Average F1         @[ IoU=0.50     ] = 0.660
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.909
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.691
 Average positive_micro_precision @[ IoU=0.50     ] = 0.860
 Average cgF1       @[ IoU=

In [6]:
# Compute averages
METRICS = ["cgf1", "il_mcc", "pmf1"]
avg_stats, avg_stats_bbox = {}, {}
for key in METRICS:
    avg_stats[key] = sum(res[key] for res in results_gold.values()) / len(results_gold)
    avg_stats_bbox[key] = sum(res[key] for res in results_gold_bbox.values()) / len(results_gold_bbox)
results_gold["Average"] = avg_stats
results_gold_bbox["Average"] = avg_stats_bbox

In [7]:
# Pretty print segmentation results
from IPython.display import HTML, display

row1, row2, row3 = "", "", ""
for subset in results_gold:
    row1 += f'<th colspan="3" style="text-align:center;border-left-style:solid;border-left-width:1px">{subset}</th>'
    row2 += "<th style='border-left-style:solid;border-left-width:1px'>" + "</th><th>".join(METRICS) + "</th>"
    row3 += "<td style='border-left-style:solid;border-left-width:1px'>" + "</td><td>".join([str(round(results_gold[subset][k], 2)) for k in METRICS])  + "</td>"

display(HTML(
   f"<table><thead><tr>{row1}</tr><tr>{row2}</tr></thead><tbody><tr>{row3}</tr></tbody></table>"
))

metaclip_nps,metaclip_nps,metaclip_nps,sa1b_nps,sa1b_nps,sa1b_nps,crowded,crowded,crowded,fg_food,fg_food,fg_food,fg_sports_equipment,fg_sports_equipment,fg_sports_equipment,attributes,attributes,attributes,wiki_common,wiki_common,wiki_common,Average,Average,Average
cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1
47.26,0.81,58.58,53.69,0.86,62.55,61.08,0.9,67.73,53.41,0.79,67.28,65.52,0.89,73.75,54.93,0.76,72.0,42.53,0.7,60.85,54.06,0.82,66.11


In [8]:
# Pretty print bbox detection results
from IPython.display import HTML, display

row1, row2, row3 = "", "", ""
for subset in results_gold:
    row1 += f'<th colspan="3" style="text-align:center;border-left-style:solid;border-left-width:1px">{subset}</th>'
    row2 += "<th style='border-left-style:solid;border-left-width:1px'>" + "</th><th>".join(METRICS) + "</th>"
    row3 += "<td style='border-left-style:solid;border-left-width:1px'>" + "</td><td>".join([str(round(results_gold_bbox[subset][k], 2)) for k in METRICS])  + "</td>"

display(HTML(
   f"<table><thead><tr>{row1}</tr><tr>{row2}</tr></thead><tbody><tr>{row3}</tr></tbody></table>"
))

metaclip_nps,metaclip_nps,metaclip_nps,sa1b_nps,sa1b_nps,sa1b_nps,crowded,crowded,crowded,fg_food,fg_food,fg_food,fg_sports_equipment,fg_sports_equipment,fg_sports_equipment,attributes,attributes,attributes,wiki_common,wiki_common,wiki_common,Average,Average,Average
cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1
50.03,0.81,61.99,55.36,0.86,64.49,61.71,0.9,68.43,53.79,0.8,67.64,68.07,0.89,76.62,56.46,0.76,73.94,44.27,0.7,63.34,55.67,0.82,68.06


# SA-Co/Silver

In [9]:
# Update to the directory where the GT annotation and PRED files exist
GT_DIR =  # PUT YOUR PATH HERE
PRED_DIR =  # PUT YOUR PATH HERE

In [10]:
saco_silver_gts = {
    "bdd100k": "silver_bdd100k_merged_test.json",
    "droid": "silver_droid_merged_test.json",
    "ego4d": "silver_ego4d_merged_test.json",
    "food_rec": "silver_food_rec_merged_test.json",
    "geode": "silver_geode_merged_test.json",
    "inaturalist": "silver_inaturalist_merged_test.json",
    "nga_art": "silver_nga_art_merged_test.json",
    "sav": "silver_sav_merged_test.json",
    "yt1b": "silver_yt1b_merged_test.json",
    "fathomnet": "silver_fathomnet_test.json",
}

In [11]:
results_silver = {}
results_silver_bbox = {}

for subset_name, gt in saco_silver_gts.items():
    print("Processing subset: ", subset_name)
    gt_path = os.path.join(GT_DIR, gt)
    pred_path = os.path.join(PRED_DIR, f"silver_{subset_name}/dumps/silver_{subset_name}/coco_predictions_segm.json")
    
    evaluator = CGF1Evaluator(gt_path=gt_path, verbose=True, iou_type="segm") 
    summary = evaluator.evaluate(pred_path)
    print(summary)

    cur_results = {}
    cur_results["cgf1"] = summary["cgF1_eval_segm_cgF1"] * 100
    cur_results["il_mcc"] = summary["cgF1_eval_segm_IL_MCC"]
    cur_results["pmf1"] = summary["cgF1_eval_segm_positive_micro_F1"] * 100
    results_silver[subset_name] = cur_results

    # Also eval bbox    
    evaluator = CGF1Evaluator(gt_path=gt_path, verbose=True, iou_type="bbox") 
    summary = evaluator.evaluate(pred_path)
    print(summary)

    cur_results = {}
    cur_results["cgf1"] = summary["cgF1_eval_bbox_cgF1"] * 100
    cur_results["il_mcc"] = summary["cgF1_eval_bbox_IL_MCC"]
    cur_results["pmf1"] = summary["cgF1_eval_bbox_positive_micro_F1"] * 100
    results_silver_bbox[subset_name] = cur_results


Processing subset:  bdd100k
loading annotations into memory...
Done (t=0.12s)
creating index...
index created!
Loaded 31278 predictions


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5439/5439 [00:01<00:00, 3496.20it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.466
 Average precision  @[ IoU=0.50:0.95] = 0.514
 Average recall     @[ IoU=0.50:0.95] = 0.644
 Average F1         @[ IoU=0.50:0.95] = 0.572
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.669
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.601
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.564
 Average IL_precision                = 0.870
 Average IL_recall                   = 0.952
 Average IL_F1                       = 0.909
 Average IL_FPR                      = 0.196
 Average IL_MCC                      = 0.775
 Average cgF1       @[ IoU=0.50     ] = 0.563
 Average precision  @[ IoU=0.50     ] = 0.621
 Average recall     @[ IoU=0.50     ] = 0.779
 Average F1         @[ IoU=0.50     ] = 0.691
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.769
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.726
 Average positive_micro_precision @[ IoU=0.50     ] = 0.681
 Average cgF1       @[ IoU=

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5439/5439 [00:01<00:00, 4945.36it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.462
 Average precision  @[ IoU=0.50:0.95] = 0.510
 Average recall     @[ IoU=0.50:0.95] = 0.639
 Average F1         @[ IoU=0.50:0.95] = 0.567
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.673
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.596
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.559
 Average IL_precision                = 0.870
 Average IL_recall                   = 0.952
 Average IL_F1                       = 0.909
 Average IL_FPR                      = 0.196
 Average IL_MCC                      = 0.775
 Average cgF1       @[ IoU=0.50     ] = 0.562
 Average precision  @[ IoU=0.50     ] = 0.620
 Average recall     @[ IoU=0.50     ] = 0.777
 Average F1         @[ IoU=0.50     ] = 0.689
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.769
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.725
 Average positive_micro_precision @[ IoU=0.50     ] = 0.679
 Average cgF1       @[ IoU=

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9415/9415 [00:02<00:00, 4431.41it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.456
 Average precision  @[ IoU=0.50:0.95] = 0.501
 Average recall     @[ IoU=0.50:0.95] = 0.651
 Average F1         @[ IoU=0.50:0.95] = 0.566
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.717
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.603
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.562
 Average IL_precision                = 0.869
 Average IL_recall                   = 0.881
 Average IL_F1                       = 0.875
 Average IL_FPR                      = 0.125
 Average IL_MCC                      = 0.755
 Average cgF1       @[ IoU=0.50     ] = 0.517
 Average precision  @[ IoU=0.50     ] = 0.568
 Average recall     @[ IoU=0.50     ] = 0.739
 Average F1         @[ IoU=0.50     ] = 0.642
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.782
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.685
 Average positive_micro_precision @[ IoU=0.50     ] = 0.638
 Average cgF1       @[ IoU=

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9415/9415 [00:01<00:00, 5301.02it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.461
 Average precision  @[ IoU=0.50:0.95] = 0.506
 Average recall     @[ IoU=0.50:0.95] = 0.659
 Average F1         @[ IoU=0.50:0.95] = 0.573
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.726
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.611
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.569
 Average IL_precision                = 0.869
 Average IL_recall                   = 0.881
 Average IL_F1                       = 0.875
 Average IL_FPR                      = 0.125
 Average IL_MCC                      = 0.755
 Average cgF1       @[ IoU=0.50     ] = 0.516
 Average precision  @[ IoU=0.50     ] = 0.566
 Average recall     @[ IoU=0.50     ] = 0.737
 Average F1         @[ IoU=0.50     ] = 0.641
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.778
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.683
 Average positive_micro_precision @[ IoU=0.50     ] = 0.636
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12428/12428 [00:04<00:00, 2599.59it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.386
 Average precision  @[ IoU=0.50:0.95] = 0.521
 Average recall     @[ IoU=0.50:0.95] = 0.689
 Average F1         @[ IoU=0.50:0.95] = 0.594
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.765
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.626
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.573
 Average IL_precision                = 0.901
 Average IL_recall                   = 0.912
 Average IL_F1                       = 0.907
 Average IL_FPR                      = 0.303
 Average IL_MCC                      = 0.618
 Average cgF1       @[ IoU=0.50     ] = 0.438
 Average precision  @[ IoU=0.50     ] = 0.591
 Average recall     @[ IoU=0.50     ] = 0.782
 Average F1         @[ IoU=0.50     ] = 0.673
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.842
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.709
 Average positive_micro_precision @[ IoU=0.50     ] = 0.649
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12428/12428 [00:03<00:00, 4068.74it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.388
 Average precision  @[ IoU=0.50:0.95] = 0.523
 Average recall     @[ IoU=0.50:0.95] = 0.692
 Average F1         @[ IoU=0.50:0.95] = 0.596
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.778
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.628
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.575
 Average IL_precision                = 0.901
 Average IL_recall                   = 0.912
 Average IL_F1                       = 0.907
 Average IL_FPR                      = 0.303
 Average IL_MCC                      = 0.618
 Average cgF1       @[ IoU=0.50     ] = 0.437
 Average precision  @[ IoU=0.50     ] = 0.589
 Average recall     @[ IoU=0.50     ] = 0.779
 Average F1         @[ IoU=0.50     ] = 0.671
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.840
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.707
 Average positive_micro_precision @[ IoU=0.50     ] = 0.647
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20888/20888 [00:04<00:00, 4826.96it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.530
 Average precision  @[ IoU=0.50:0.95] = 0.598
 Average recall     @[ IoU=0.50:0.95] = 0.674
 Average F1         @[ IoU=0.50:0.95] = 0.634
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.839
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.672
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.670
 Average IL_precision                = 0.863
 Average IL_recall                   = 0.903
 Average IL_F1                       = 0.883
 Average IL_FPR                      = 0.112
 Average IL_MCC                      = 0.788
 Average cgF1       @[ IoU=0.50     ] = 0.576
 Average precision  @[ IoU=0.50     ] = 0.650
 Average recall     @[ IoU=0.50     ] = 0.733
 Average F1         @[ IoU=0.50     ] = 0.689
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.883
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.731
 Average positive_micro_precision @[ IoU=0.50     ] = 0.729
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20888/20888 [00:03<00:00, 6545.85it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.534
 Average precision  @[ IoU=0.50:0.95] = 0.602
 Average recall     @[ IoU=0.50:0.95] = 0.679
 Average F1         @[ IoU=0.50:0.95] = 0.638
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.869
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.677
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.675
 Average IL_precision                = 0.863
 Average IL_recall                   = 0.903
 Average IL_F1                       = 0.883
 Average IL_FPR                      = 0.112
 Average IL_MCC                      = 0.788
 Average cgF1       @[ IoU=0.50     ] = 0.577
 Average precision  @[ IoU=0.50     ] = 0.652
 Average recall     @[ IoU=0.50     ] = 0.735
 Average F1         @[ IoU=0.50     ] = 0.691
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.897
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.733
 Average positive_micro_precision @[ IoU=0.50     ] = 0.731
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14797/14797 [00:02<00:00, 5611.66it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.701
 Average precision  @[ IoU=0.50:0.95] = 0.672
 Average recall     @[ IoU=0.50:0.95] = 0.840
 Average F1         @[ IoU=0.50:0.95] = 0.747
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.857
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.787
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.741
 Average IL_precision                = 0.881
 Average IL_recall                   = 0.975
 Average IL_F1                       = 0.925
 Average IL_FPR                      = 0.062
 Average IL_MCC                      = 0.890
 Average cgF1       @[ IoU=0.50     ] = 0.745
 Average precision  @[ IoU=0.50     ] = 0.714
 Average recall     @[ IoU=0.50     ] = 0.893
 Average F1         @[ IoU=0.50     ] = 0.793
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.899
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.837
 Average positive_micro_precision @[ IoU=0.50     ] = 0.787
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14797/14797 [00:01<00:00, 7792.81it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.708
 Average precision  @[ IoU=0.50:0.95] = 0.679
 Average recall     @[ IoU=0.50:0.95] = 0.848
 Average F1         @[ IoU=0.50:0.95] = 0.754
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.872
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.795
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.748
 Average IL_precision                = 0.881
 Average IL_recall                   = 0.975
 Average IL_F1                       = 0.925
 Average IL_FPR                      = 0.062
 Average IL_MCC                      = 0.890
 Average cgF1       @[ IoU=0.50     ] = 0.744
 Average precision  @[ IoU=0.50     ] = 0.714
 Average recall     @[ IoU=0.50     ] = 0.892
 Average F1         @[ IoU=0.50     ] = 0.793
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.901
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.836
 Average positive_micro_precision @[ IoU=0.50     ] = 0.787
 Average cgF1       @[ IoU=

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1439027/1439027 [01:22<00:00, 17398.82it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.658
 Average precision  @[ IoU=0.50:0.95] = 0.776
 Average recall     @[ IoU=0.50:0.95] = 0.722
 Average F1         @[ IoU=0.50:0.95] = 0.748
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.935
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.807
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.915
 Average IL_precision                = 0.848
 Average IL_recall                   = 0.796
 Average IL_F1                       = 0.821
 Average IL_FPR                      = 0.005
 Average IL_MCC                      = 0.816
 Average cgF1       @[ IoU=0.50     ] = 0.692
 Average precision  @[ IoU=0.50     ] = 0.816
 Average recall     @[ IoU=0.50     ] = 0.759
 Average F1         @[ IoU=0.50     ] = 0.786
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.981
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.848
 Average positive_micro_precision @[ IoU=0.50     ] = 0.962
 Average cgF1       @[ IoU=

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1439027/1439027 [01:18<00:00, 18312.98it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.652
 Average precision  @[ IoU=0.50:0.95] = 0.769
 Average recall     @[ IoU=0.50:0.95] = 0.715
 Average F1         @[ IoU=0.50:0.95] = 0.741
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.926
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.800
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.907
 Average IL_precision                = 0.848
 Average IL_recall                   = 0.796
 Average IL_F1                       = 0.821
 Average IL_FPR                      = 0.005
 Average IL_MCC                      = 0.816
 Average cgF1       @[ IoU=0.50     ] = 0.691
 Average precision  @[ IoU=0.50     ] = 0.815
 Average recall     @[ IoU=0.50     ] = 0.758
 Average F1         @[ IoU=0.50     ] = 0.786
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.981
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.848
 Average positive_micro_precision @[ IoU=0.50     ] = 0.961
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 22221/22221 [00:04<00:00, 5095.16it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.381
 Average precision  @[ IoU=0.50:0.95] = 0.523
 Average recall     @[ IoU=0.50:0.95] = 0.512
 Average F1         @[ IoU=0.50:0.95] = 0.517
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.754
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.576
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.659
 Average IL_precision                = 0.700
 Average IL_recall                   = 0.809
 Average IL_F1                       = 0.750
 Average IL_FPR                      = 0.118
 Average IL_MCC                      = 0.661
 Average cgF1       @[ IoU=0.50     ] = 0.435
 Average precision  @[ IoU=0.50     ] = 0.597
 Average recall     @[ IoU=0.50     ] = 0.585
 Average F1         @[ IoU=0.50     ] = 0.591
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.838
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.658
 Average positive_micro_precision @[ IoU=0.50     ] = 0.753
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 22221/22221 [00:02<00:00, 7605.50it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.385
 Average precision  @[ IoU=0.50:0.95] = 0.528
 Average recall     @[ IoU=0.50:0.95] = 0.517
 Average F1         @[ IoU=0.50:0.95] = 0.523
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.775
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.582
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.666
 Average IL_precision                = 0.700
 Average IL_recall                   = 0.809
 Average IL_F1                       = 0.750
 Average IL_FPR                      = 0.118
 Average IL_MCC                      = 0.661
 Average cgF1       @[ IoU=0.50     ] = 0.432
 Average precision  @[ IoU=0.50     ] = 0.594
 Average recall     @[ IoU=0.50     ] = 0.582
 Average F1         @[ IoU=0.50     ] = 0.588
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.838
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.655
 Average positive_micro_precision @[ IoU=0.50     ] = 0.749
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18079/18079 [00:05<00:00, 3149.81it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.444
 Average precision  @[ IoU=0.50:0.95] = 0.587
 Average recall     @[ IoU=0.50:0.95] = 0.684
 Average F1         @[ IoU=0.50:0.95] = 0.632
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.768
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.660
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.639
 Average IL_precision                = 0.900
 Average IL_recall                   = 0.908
 Average IL_F1                       = 0.904
 Average IL_FPR                      = 0.241
 Average IL_MCC                      = 0.672
 Average cgF1       @[ IoU=0.50     ] = 0.503
 Average precision  @[ IoU=0.50     ] = 0.666
 Average recall     @[ IoU=0.50     ] = 0.776
 Average F1         @[ IoU=0.50     ] = 0.717
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.846
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.750
 Average positive_micro_precision @[ IoU=0.50     ] = 0.725
 Average cgF1       @[ IoU=

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18079/18079 [00:03<00:00, 4783.75it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.444
 Average precision  @[ IoU=0.50:0.95] = 0.588
 Average recall     @[ IoU=0.50:0.95] = 0.685
 Average F1         @[ IoU=0.50:0.95] = 0.632
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.776
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.661
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.639
 Average IL_precision                = 0.900
 Average IL_recall                   = 0.908
 Average IL_F1                       = 0.904
 Average IL_FPR                      = 0.241
 Average IL_MCC                      = 0.672
 Average cgF1       @[ IoU=0.50     ] = 0.503
 Average precision  @[ IoU=0.50     ] = 0.665
 Average recall     @[ IoU=0.50     ] = 0.775
 Average F1         @[ IoU=0.50     ] = 0.716
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.846
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.748
 Average positive_micro_precision @[ IoU=0.50     ] = 0.724
 Average cgF1       @[ IoU=

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7778/7778 [00:01<00:00, 4510.67it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.421
 Average precision  @[ IoU=0.50:0.95] = 0.545
 Average recall     @[ IoU=0.50:0.95] = 0.567
 Average F1         @[ IoU=0.50:0.95] = 0.556
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.720
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.584
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.601
 Average IL_precision                = 0.853
 Average IL_recall                   = 0.841
 Average IL_F1                       = 0.847
 Average IL_FPR                      = 0.121
 Average IL_MCC                      = 0.721
 Average cgF1       @[ IoU=0.50     ] = 0.505
 Average precision  @[ IoU=0.50     ] = 0.655
 Average recall     @[ IoU=0.50     ] = 0.681
 Average F1         @[ IoU=0.50     ] = 0.668
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.820
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.701
 Average positive_micro_precision @[ IoU=0.50     ] = 0.722
 Average cgF1       @[ IoU=

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7778/7778 [00:01<00:00, 6379.32it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.424
 Average precision  @[ IoU=0.50:0.95] = 0.549
 Average recall     @[ IoU=0.50:0.95] = 0.571
 Average F1         @[ IoU=0.50:0.95] = 0.560
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.732
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.588
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.605
 Average IL_precision                = 0.853
 Average IL_recall                   = 0.841
 Average IL_F1                       = 0.847
 Average IL_FPR                      = 0.121
 Average IL_MCC                      = 0.721
 Average cgF1       @[ IoU=0.50     ] = 0.502
 Average precision  @[ IoU=0.50     ] = 0.651
 Average recall     @[ IoU=0.50     ] = 0.677
 Average F1         @[ IoU=0.50     ] = 0.664
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.818
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.697
 Average positive_micro_precision @[ IoU=0.50     ] = 0.718
 Average cgF1       @[ IoU=

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 281205/281205 [00:14<00:00, 19028.75it/s]


Accumulating results
cgF1 metric, IoU type=segm
 Average cgF1       @[ IoU=0.50:0.95] = 0.515
 Average precision  @[ IoU=0.50:0.95] = 0.472
 Average recall     @[ IoU=0.50:0.95] = 0.711
 Average F1         @[ IoU=0.50:0.95] = 0.567
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.690
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.600
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.519
 Average IL_precision                = 0.839
 Average IL_recall                   = 0.885
 Average IL_F1                       = 0.861
 Average IL_FPR                      = 0.003
 Average IL_MCC                      = 0.859
 Average cgF1       @[ IoU=0.50     ] = 0.615
 Average precision  @[ IoU=0.50     ] = 0.564
 Average recall     @[ IoU=0.50     ] = 0.848
 Average F1         @[ IoU=0.50     ] = 0.677
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.828
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.716
 Average positive_micro_precision @[ IoU=0.50     ] = 0.619
 Average cgF1       @[ IoU=

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 281205/281205 [00:14<00:00, 19965.61it/s]


Accumulating results
cgF1 metric, IoU type=bbox
 Average cgF1       @[ IoU=0.50:0.95] = 0.540
 Average precision  @[ IoU=0.50:0.95] = 0.495
 Average recall     @[ IoU=0.50:0.95] = 0.745
 Average F1         @[ IoU=0.50:0.95] = 0.595
 Average positive_macro_F1 @[ IoU=0.50:0.95] = 0.747
 Average positive_micro_F1 @[ IoU=0.50:0.95] = 0.629
 Average positive_micro_precision @[ IoU=0.50:0.95] = 0.544
 Average IL_precision                = 0.839
 Average IL_recall                   = 0.885
 Average IL_F1                       = 0.861
 Average IL_FPR                      = 0.003
 Average IL_MCC                      = 0.859
 Average cgF1       @[ IoU=0.50     ] = 0.620
 Average precision  @[ IoU=0.50     ] = 0.568
 Average recall     @[ IoU=0.50     ] = 0.855
 Average F1         @[ IoU=0.50     ] = 0.683
 Average positive_macro_F1 @[ IoU=0.50     ] = 0.842
 Average positive_micro_F1 @[ IoU=0.50     ] = 0.722
 Average positive_micro_precision @[ IoU=0.50     ] = 0.625
 Average cgF1       @[ IoU=

In [12]:
# Compute averages
METRICS = ["cgf1", "il_mcc", "pmf1"]
avg_stats, avg_stats_bbox = {}, {}
for key in METRICS:
    avg_stats[key] = sum(res[key] for res in results_silver.values()) / len(results_silver)
    avg_stats_bbox[key] = sum(res[key] for res in results_silver_bbox.values()) / len(results_silver_bbox)
results_silver["Average"] = avg_stats
results_silver_bbox["Average"] = avg_stats_bbox

In [13]:
# Pretty print segmentation results
from IPython.display import HTML, display

row1, row2, row3 = "", "", ""
for subset in results_silver:
    row1 += f'<th colspan="3" style="text-align:center;border-left-style:solid;border-left-width:1px">{subset}</th>'
    row2 += "<th style='border-left-style:solid;border-left-width:1px'>" + "</th><th>".join(METRICS) + "</th>"
    row3 += "<td style='border-left-style:solid;border-left-width:1px'>" + "</td><td>".join([str(round(results_silver[subset][k], 2)) for k in METRICS])  + "</td>"

display(HTML(
   f"<table><thead><tr>{row1}</tr><tr>{row2}</tr></thead><tbody><tr>{row3}</tr></tbody></table>"
))

bdd100k,bdd100k,bdd100k,droid,droid,droid,ego4d,ego4d,ego4d,food_rec,food_rec,food_rec,geode,geode,geode,inaturalist,inaturalist,inaturalist,nga_art,nga_art,nga_art,sav,sav,sav,yt1b,yt1b,yt1b,fathomnet,fathomnet,fathomnet,Average,Average,Average
cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1
46.61,0.78,60.13,45.58,0.76,60.35,38.64,0.62,62.56,52.96,0.79,67.21,70.07,0.89,78.73,65.8,0.82,80.67,38.06,0.66,57.62,44.36,0.67,66.05,42.07,0.72,58.36,51.53,0.86,59.98,49.57,0.76,65.17


In [14]:
# Pretty print bbox detection results
from IPython.display import HTML, display

row1, row2, row3 = "", "", ""
for subset in results_silver_bbox:
    row1 += f'<th colspan="3" style="text-align:center;border-left-style:solid;border-left-width:1px">{subset}</th>'
    row2 += "<th style='border-left-style:solid;border-left-width:1px'>" + "</th><th>".join(METRICS) + "</th>"
    row3 += "<td style='border-left-style:solid;border-left-width:1px'>" + "</td><td>".join([str(round(results_silver_bbox[subset][k], 2)) for k in METRICS])  + "</td>"

display(HTML(
   f"<table><thead><tr>{row1}</tr><tr>{row2}</tr></thead><tbody><tr>{row3}</tr></tbody></table>"
))

bdd100k,bdd100k,bdd100k,droid,droid,droid,ego4d,ego4d,ego4d,food_rec,food_rec,food_rec,geode,geode,geode,inaturalist,inaturalist,inaturalist,nga_art,nga_art,nga_art,sav,sav,sav,yt1b,yt1b,yt1b,fathomnet,fathomnet,fathomnet,Average,Average,Average
cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1,cgf1,il_mcc,pmf1
46.21,0.78,59.62,46.12,0.76,61.07,38.79,0.62,62.8,53.35,0.79,67.71,70.76,0.89,79.51,65.24,0.82,79.99,38.47,0.66,58.23,44.41,0.67,66.13,42.36,0.72,58.76,54.04,0.86,62.91,49.98,0.76,65.67
