In [1]:
import pandas as pd
from scipy.stats import pearsonr

data_sentence = {
    "Language": [
        "Indonesian", "Turkish", "Tagalog", "Portuguese", "Spanish", "Vietnamese",
        "Swahili", "Cantonese", "Hindi", "Mandarin", "Thai", "Japanese",
        "Urdu", "Mongolian", "Arabic", "Punjabi", "Bengali", "Persian", "Nepali",
        "Burmese", "Khmer"
    ],
    "chrF": [0.93, 0.90, 0.86, 0.85, 0.82, 0.76, 0.71, 0.78, 0.83, 0.79, 0.78, 0.70,
             0.76, 0.70, 0.78, 0.70, 0.88, 0.85, 0.81, 0.58, 0.55],
    "BLEU": [0.85, 0.79, 0.75, 0.73, 0.64, 0.66, 0.58, 0.60, 0.72, 0.35, 0.36, 0.00,
             0.60, 0.49, 0.60, 0.53, 0.76, 0.74, 0.63, 0.17, 0.16],
    "ROUGE-1": [0.92, 0.92, 0.86, 0.85, 0.84, 0.84, 0.81, 0.66, 0.60, 0.66, 0.64, 0.83,
                0.55, 0.62, 0.47, 0.56, 0.16, 0.16, 0.14, 0.38, 0.36],
    "ROUGE-2": [0.85, 0.85, 0.74, 0.74, 0.73, 0.75, 0.78, 0.67, 0.48, 0.67, 0.63, 0.65,
                0.44, 0.50, 0.45, 0.48, 0.16, 0.16, 0.11, 0.27, 0.31],
    "ROUGE-L": [0.91, 0.91, 0.84, 0.84, 0.84, 0.81, 0.81, 0.67, 0.55, 0.67, 0.64, 0.82,
                0.50, 0.55, 0.48, 0.53, 0.16, 0.16, 0.14, 0.37, 0.37],
    "Mean": [0.89, 0.87, 0.81, 0.80, 0.77, 0.76, 0.74, 0.68, 0.64, 0.63, 0.61, 0.60,
             0.57, 0.57, 0.56, 0.56, 0.42, 0.41, 0.37, 0.35, 0.35]
}


data_paragraph = {
    "Language": [
        "Swahili", "Indonesian", "Turkish", "Vietnamese", "Tagalog", "Portuguese",
        "Spanish", "Cantonese", "Hindi", "Mandarin", "Persian", "Thai", "Urdu",
        "Bengali", "Arabic", "Punjabi", "Mongolian", "Nepali", "Japanese", "Burmese",
        "Khmer"
    ],
    "chrF": [0.93, 0.92, 0.89, 0.77, 0.85, 0.84, 0.82, 0.77, 0.82, 0.70, 0.89, 0.77,
             0.74, 0.88, 0.77, 0.70, 0.67, 0.80, 0.66, 0.55, 0.53],
    "BLEU": [0.88, 0.83, 0.81, 0.73, 0.74, 0.72, 0.63, 0.78, 0.69, 0.71, 0.79, 0.47,
             0.62, 0.76, 0.60, 0.57, 0.47, 0.63, 0.00, 0.20, 0.23],
    "ROUGE-1": [0.95, 0.94, 0.92, 0.87, 0.83, 0.83, 0.84, 0.40, 0.40, 0.35, 0.20, 0.38,
                0.36, 0.18, 0.32, 0.34, 0.39, 0.12, 0.40, 0.21, 0.21],
    "ROUGE-2": [0.83, 0.86, 0.83, 0.82, 0.70, 0.71, 0.66, 0.31, 0.17, 0.16, 0.07, 0.16,
                0.08, 0.08, 0.08, 0.15, 0.12, 0.04, 0.23, 0.11, 0.09],
    "ROUGE-L": [0.95, 0.94, 0.92, 0.87, 0.82, 0.83, 0.84, 0.40, 0.39, 0.35, 0.21, 0.39,
                0.35, 0.18, 0.33, 0.33, 0.38, 0.12, 0.39, 0.21, 0.21],
    "Mean": [0.91, 0.90, 0.87, 0.81, 0.79, 0.79, 0.76, 0.53, 0.49, 0.45, 0.43, 0.43,
             0.43, 0.42, 0.42, 0.42, 0.41, 0.34, 0.34, 0.26, 0.25]
}


df_sentence = pd.DataFrame(data_sentence)
df_paragraph = pd.DataFrame(data_paragraph)


merged_df = pd.merge(df_sentence, df_paragraph, on="Language", suffixes=('_sent', '_para'))

print("Merged DataFrame:")
print(merged_df)


metrics = ["chrF", "BLEU", "ROUGE-1", "ROUGE-2", "ROUGE-L", "Mean"]

print("\nPearson Correlation Coefficients:")
for metric in metrics:
    col_sent = metric + "_sent"
    col_para = metric + "_para"
    # Calculate Pearson correlation and p-value
    r, p_value = pearsonr(merged_df[col_sent], merged_df[col_para])
    print(f"{metric:8s}: r = {r:.3f}, p-value = {p_value:.3e}")

Merged DataFrame:
      Language  chrF_sent  BLEU_sent  ROUGE-1_sent  ROUGE-2_sent  \
0   Indonesian       0.93       0.85          0.92          0.85   
1      Turkish       0.90       0.79          0.92          0.85   
2      Tagalog       0.86       0.75          0.86          0.74   
3   Portuguese       0.85       0.73          0.85          0.74   
4      Spanish       0.82       0.64          0.84          0.73   
5   Vietnamese       0.76       0.66          0.84          0.75   
6      Swahili       0.71       0.58          0.81          0.78   
7    Cantonese       0.78       0.60          0.66          0.67   
8        Hindi       0.83       0.72          0.60          0.48   
9     Mandarin       0.79       0.35          0.66          0.67   
10        Thai       0.78       0.36          0.64          0.63   
11    Japanese       0.70       0.00          0.83          0.65   
12        Urdu       0.76       0.60          0.55          0.44   
13   Mongolian       0.70     