In [1]:
import os

In [2]:
# Here we will evaluate the model on the test set
# Import the directory_benchmark module from the eval directory

import sys
sys.path.append("../../")
from eval.directory_benchmark import benchmark_directories, benchmark_single_file

def benchmark(test_set_labels_dir: str, pred_labels_dir: str, iou_threshold: float = 0.0):
    """
    Benchmark the model on the test set using the new directory_benchmark script
    
    Args:
        test_set_labels_dir: Directory containing ground truth JSON files
        pred_labels_dir: Directory containing prediction JSON files  
        iou_threshold: IoU threshold for bounding box matching (default: 0.5)
    
    Returns:
        pandas.DataFrame: Results dataframe with simplified columns
    """
    return benchmark_directories(test_set_labels_dir, pred_labels_dir, iou_threshold)

def benchmark_from_file(test_data_file: str, pred_labels_dir: str, iou_threshold: float = 0.5):
    """
    Benchmark the model on the test set using a single JSON file containing all test data
    
    Args:
        test_data_file: Path to JSON file containing all ground truth data in format:
                       [{"file_name": "...", "labels": [...]}, ...]
        pred_labels_dir: Directory containing prediction JSON files  
        iou_threshold: IoU threshold for bounding box matching (default: 0.0)
    
    Returns:
        pandas.DataFrame: Results dataframe with simplified columns
    """
    return benchmark_single_file(test_data_file, pred_labels_dir, iou_threshold)

In [67]:
test_data_file = "/Volumes/MyDataDrive/thesis/code-2/new-data/IIT-CDIP/test/test-data-iit-cdip-840px.json"
pred_labels_dir = "/Volumes/MyDataDrive/thesis/code-2/new-data/benchmarks/trained_model/model_pred/any_annotator/r64_alpha128_train-data-mixed-840px-annotator-1_lr0_000042/checkpoint-94"


results = benchmark_from_file(test_data_file, pred_labels_dir, iou_threshold=0.0)



Loading test data from: /Volumes/MyDataDrive/thesis/code-2/new-data/IIT-CDIP/test/test-data-iit-cdip-840px.json
Found 100 ground truth files to process
Files with predictions: 100
Files without predictions: 0 (will be treated as complete prediction failures)

Counting all ground truth entities...
  fhfb0066_page1: 3 entities
  fjcc0074_page4: 17 entities
  fgbf0249_page5: 31 entities
  fhbh0030_page1: 10 entities
  fgcg0076_page1: 5 entities
  fgdp0140_page1: 11 entities
  fgvl0222_page1: 25 entities
  fhvc0235_page2: 57 entities
  fhnm0001_page1: 86 entities
  fjgh0000_page1: 10 entities
  fhyg0141_page1: 15 entities
  fhyn0144_page1: 12 entities
  fgvd0138_page1: 16 entities
  fgvk0122_page3: 7 entities
  fgvg0019_page1: 6 entities
  fhhd0023_page2: 8 entities
  fhkm0113_page1: 6 entities
  fhxj0190_page1: 12 entities
  fhvb0035_page1: 16 entities
  fjbw0138_page1: 7 entities
  fgfy0251_page1: 17 entities
  fgmp0189_page3: 21 entities
  fjjc0186_page2: 9 entities
  fgxw0053_page1: 8 

In [59]:
print(results)

         Document ID        Entity Type                  Ground Truth  \
0     fgbf0249_page1  Organization Name                                 
1     fgbf0249_page1  Organization Name  Mallinckrodt Pharmaceuticals   
2     fgbf0249_page1  Organization Name                  Mallinckrodt   
3     fgbf0249_page1  Organization Name                      McKesson   
4     fgbf0249_page1  Organization Name                       Enclara   
...              ...                ...                           ...   
2663  fjlf0106_page1        Person Name                    C. Koehler   
2664  fjlf0106_page1        Person Name                      D. Watts   
2665  fjln0143_page4  Organization Name                                 
2666  fjln0143_page4    Contract Number                                 
2667  fjln0143_page4    Contract Number                6761 5 5 5 5 9   

                                              Predicted is_correct  bbox_iou  \
0     Mallinckrodt Pharmaceuticals | Specia