In [None]:
#Imports
import json 
import csv

In [None]:
#Load medcat export
file = '../data/MedCAT_Export_NPH' 
user_filter = 'any' #'any' for all users

In [None]:
#Open file
with open(f'{file}.json', 'r') as f: 
    data = json.load(f)

In [None]:
#Get stats per concept
concept_stats = {}

for project in data.get('projects', []):
    for doc in project.get('documents', []):
        for ann in doc.get('annotations', []):

            # User filter
            if user_filter != 'any' and ann.get('user') != user_filter:
                continue

            validated = ann.get('validated', False)
            if not validated:
                continue  # Keep only validated annotations

            cui = ann.get('cui')
            value = ann.get('value') or ann.get('term')
            if not cui or not value:
                continue

            correct = ann.get('correct', False)
            deleted = ann.get('deleted', False)
            alternative = ann.get('alternative', False)

            # Determine TP / FN / FP
            is_tp = correct
            is_fn = alternative
            is_fp = not correct and not alternative and not deleted

            # Store stats keyed by (cui, user_for_output)
            user_for_output = ann.get('user') if user_filter == 'any' else user_filter
            key = (cui, user_for_output)

            if key not in concept_stats:
                concept_stats[key] = {
                    'TP': 0,
                    'FP': 0,
                    'FN': 0,
                    'Support': 0,
                    'Values': set()
                }

            concept_stats[key]['Support'] += 1
            concept_stats[key]['Values'].add(value)

            if is_tp:
                concept_stats[key]['TP'] += 1
            elif is_fn:
                concept_stats[key]['FN'] += 1
            elif is_fp:
                concept_stats[key]['FP'] += 1

In [8]:
# Prepare sorted results
results = []
for (cui, user_for_output), stats in concept_stats.items():
    TP = stats['TP']
    FP = stats['FP']
    FN = stats['FN']
    support = stats['Support']

    if TP == 0 and FP == 0 and FN == 0:
        continue

    precision = TP / (TP + FP) if (TP + FP) > 0 else 0.0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0.0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0

    results.append({
        'user': user_for_output,
        'concept': cui,
        'value': ', '.join(sorted(stats['Values'])),
        'support': support,
        'precision': round(precision, 4),
        'recall': round(recall, 4),
        'f1': round(f1, 4)
    })

In [9]:
# Sort by support descending
results.sort(key=lambda x: x['support'], reverse=True)

In [10]:
# Write to CSV
with open(f'{file}_concept_metrics.csv', 'w', newline='') as csvfile:
    fieldnames = ['user', 'concept', 'value', 'support', 'precision', 'recall', 'f1']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(results)

In [11]:
# Calculate macro mean F1 across all concepts
if results:
    macro_f1 = sum(r['f1'] for r in results) / len(results)
    print(f"Macro mean F1 across all concepts: {round(macro_f1, 4)}")
else:
    print("No results to calculate macro mean F1.")

Macro mean F1 across all concepts: 0.8428
