In [3]:
import json
import numpy as np
from collections import Counter

def calculate_entropy(probs):
    """Calculate entropy of a probability distribution"""
    probs = np.array(probs)
    # Remove zeros to avoid log(0)
    probs = probs[probs > 0]
    return -np.sum(probs * np.log(probs))

matches = 0
mismatches = 0
type_counts = Counter()
match_entropies = []
mismatch_entropies = []
match_max_probs = []
mismatch_max_probs = []

path = "fine_tune_logs/meta-llama-Meta-Llama-3-8B-Instruct_2025-11-26-23-24-00_question_comparison.jsonl"

with open(path, 'r') as f:
    for line in f:
        if line.strip():
            data = json.loads(line)
            entry_type = data.get('type')
            if entry_type:
                type_counts[entry_type] += 1
            
            # Calculate entropy for match/mismatch entries
            if entry_type in ['model_answer_match', 'model_answer_mismatch']:
                predicted_probs = data.get('predicted_probs')
                if predicted_probs:
                    entropy = calculate_entropy(predicted_probs)
                    max_prob = np.max(predicted_probs)
                    if entry_type == 'model_answer_match':
                        matches += 1
                        match_entropies.append(entropy)
                        match_max_probs.append(max_prob)
                    elif entry_type == 'model_answer_mismatch':
                        mismatches += 1
                        mismatch_entropies.append(entropy)
                        mismatch_max_probs.append(max_prob)

total = matches + mismatches
match_percentage = (matches / total * 100) if total > 0 else 0

avg_match_entropy = np.mean(match_entropies) if match_entropies else 0
avg_mismatch_entropy = np.mean(mismatch_entropies) if mismatch_entropies else 0
avg_match_max_prob = np.mean(match_max_probs) if match_max_probs else 0
avg_mismatch_max_prob = np.mean(mismatch_max_probs) if mismatch_max_probs else 0

print(f"Matches: {matches}")
print(f"Mismatches: {mismatches}")
print(f"Total: {total}")
print(f"Match percentage: {match_percentage:.2f}%")
print(f"\nAverage entropy for model_answer_match: {avg_match_entropy:.4f}")
print(f"Average entropy for model_answer_mismatch: {avg_mismatch_entropy:.4f}")
print(f"\nAverage max probability for model_answer_match: {avg_match_max_prob:.4f}")
print(f"Average max probability for model_answer_mismatch: {avg_mismatch_max_prob:.4f}")
print("\nLog message type counts:")
for log_type, count in sorted(type_counts.items()):
    print(f"  {log_type}: {count}")


Matches: 3838
Mismatches: 6814
Total: 10652
Match percentage: 36.03%

Average entropy for model_answer_match: 1.0766
Average entropy for model_answer_mismatch: 1.2862

Average max probability for model_answer_match: 0.5090
Average max probability for model_answer_mismatch: 0.3745

Log message type counts:
  model_answer_match: 3838
  model_answer_mismatch: 6814
  verification_no_lookup_found: 760
  verification_passed: 10652
