# Permutation Test for BERTScore Similarity

This notebook performs a simple permutation test to compare the BERTScore similarities between matched clean–adversarial note pairs and randomly paired notes. It uses the BERTScore results generated by the `bertscore_similarity.ipynb` notebook.

In [None]:
import os
import numpy as np
import pandas as pd

# Load BERTScore results
results_path = '../results/bertscore_results.csv'
berts_df = pd.read_csv(results_path)

# Extract observed F1 scores for matched pairs
observed_f1 = berts_df['f1'].values

# Number of permutations
n_perm = 1000

# Function to compute mean F1 for random pairings
perm_means = []
for _ in range(n_perm):
    permuted = np.random.permutation(observed_f1)
    perm_mean = np.mean(permuted)
    perm_means.append(perm_mean)

# Observed mean
observed_mean = np.mean(observed_f1)

# Compute p-value: proportion of permuted means greater than observed mean
p_value = np.mean(np.array(perm_means) >= observed_mean)

print(f"Observed mean F1: {observed_mean:.4f}")
print(f"Permutation test p-value: {p_value:.4f}")
