In [1]:
import json

file_path = '../rag_results_scotus.json'

with open(file_path, 'r') as file:
    data = json.load(file)

data

{'0': {'true_label': 0, 'rag_label': 0},
 '1': {'true_label': 3, 'rag_label': 0},
 '2': {'true_label': 1, 'rag_label': 0},
 '3': {'true_label': 8, 'rag_label': 0},
 '4': {'true_label': 1, 'rag_label': 0},
 '5': {'true_label': 6, 'rag_label': 0},
 '6': {'true_label': 11, 'rag_label': -1},
 '7': {'true_label': 0, 'rag_label': -1},
 '8': {'true_label': 7, 'rag_label': 0},
 '9': {'true_label': 7, 'rag_label': 0},
 '10': {'true_label': 1, 'rag_label': -1},
 '11': {'true_label': 11, 'rag_label': 0},
 '12': {'true_label': 1, 'rag_label': 0},
 '13': {'true_label': 8, 'rag_label': 0},
 '14': {'true_label': 8, 'rag_label': 0},
 '15': {'true_label': 0, 'rag_label': -1},
 '16': {'true_label': 7, 'rag_label': 0},
 '17': {'true_label': 2, 'rag_label': 0},
 '18': {'true_label': 8, 'rag_label': 0},
 '19': {'true_label': 0, 'rag_label': 0},
 '20': {'true_label': 8, 'rag_label': 0},
 '21': {'true_label': 0, 'rag_label': 0},
 '22': {'true_label': 8, 'rag_label': -1},
 '23': {'true_label': 1, 'rag_label':

In [3]:
# Initialize counters for TP, FP, FN for each unique class
class_labels = set([item['true_label'] for item in data.values()] + [-1])
tp = {label: 0 for label in class_labels}
fp = {label: 0 for label in class_labels}
fn = {label: 0 for label in class_labels}

# Process each instance
for instance in data.values():
    true_label = instance['true_label']
    predicted_label = instance['rag_label']

    if true_label == predicted_label:
        tp[true_label] += 1
    else:
        fn[true_label] += 1
        fp[predicted_label] += 1

# Calculate precision, recall, and F1 for each class
precision = {label: 0 if tp[label] + fp[label] == 0 else tp[label] / (tp[label] + fp[label]) for label in class_labels}
recall = {label: 0 if tp[label] + fn[label] == 0 else tp[label] / (tp[label] + fn[label]) for label in class_labels}
f1_scores = {label: 0 if precision[label] + recall[label] == 0 else 2 * (precision[label] * recall[label]) / (precision[label] + recall[label]) for label in class_labels}

# Calculate Macro-F1
macro_f1 = sum(f1_scores.values()) / len(f1_scores)

# Calculate Micro-F1
sum_tp = sum(tp.values())
sum_fp = sum(fp.values())
sum_fn = sum(fn.values())
micro_precision = sum_tp / (sum_tp + sum_fp) if sum_tp + sum_fp > 0 else 0
micro_recall = sum_tp / (sum_tp + sum_fn) if sum_tp + sum_fn > 0 else 0
micro_f1 = 2 * (micro_precision * micro_recall) / (micro_precision + micro_recall) if micro_precision + micro_recall > 0 else 0

precision, recall, f1_scores, macro_f1, micro_f1

({0: 0.25760135135135137,
  1: 0.2222222222222222,
  2: 0,
  3: 0,
  4: 0,
  5: 0,
  6: 0,
  7: 0,
  8: 0,
  9: 0,
  10: 0,
  11: 0,
  12: 0,
  -1: 0.0},
 {0: 0.8198924731182796,
  1: 0.009009009009009009,
  2: 0.0,
  3: 0.0,
  4: 0.0,
  5: 0.0,
  6: 0.0,
  7: 0.0,
  8: 0.0,
  9: 0.0,
  10: 0.0,
  11: 0.0,
  12: 0.0,
  -1: 0},
 {0: 0.3920308483290489,
  1: 0.017316017316017316,
  2: 0,
  3: 0,
  4: 0,
  5: 0,
  6: 0,
  7: 0,
  8: 0,
  9: 0,
  10: 0,
  11: 0,
  12: 0,
  -1: 0},
 0.029239061831790446,
 0.21928571428571428)

In [4]:
precision

{0: 0.25760135135135137,
 1: 0.2222222222222222,
 2: 0,
 3: 0,
 4: 0,
 5: 0,
 6: 0,
 7: 0,
 8: 0,
 9: 0,
 10: 0,
 11: 0,
 12: 0,
 -1: 0.0}

In [7]:
recall

{0: 0.8198924731182796,
 1: 0.009009009009009009,
 2: 0.0,
 3: 0.0,
 4: 0.0,
 5: 0.0,
 6: 0.0,
 7: 0.0,
 8: 0.0,
 9: 0.0,
 10: 0.0,
 11: 0.0,
 12: 0.0,
 -1: 0}

In [8]:
f1_scores

{0: 0.3920308483290489,
 1: 0.017316017316017316,
 2: 0,
 3: 0,
 4: 0,
 5: 0,
 6: 0,
 7: 0,
 8: 0,
 9: 0,
 10: 0,
 11: 0,
 12: 0,
 -1: 0}

In [5]:
macro_f1

0.029239061831790446

In [6]:
micro_f1

0.21928571428571428