In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Beginning

In [16]:
corpus = "clef_v2" 

full_corpus_name = {
    "2020": "misinfo-2020",
    "2021": "C4-2021",
    "2022": "C4-2022",
    "CLEF": "CLEF",
    "clef": "CLEF",
    "CLEF_v2": "CLEF_v2",
    "clef_v2": "CLEF_v2"
}

fields = {
    '2020': 'title',
    '2021': 'query',
    '2022': 'query',
    'CLEF': 'title',
    'clef': 'title',
    "CLEF_v2": "title",
    "clef_v2": "title"
}

In [17]:
scores = pd.read_csv(f"confidences_query_quality_classifier/confidences_{corpus.lower()}.csv")
scores.head()

Unnamed: 0,topic,confidence
0,101004,1.578599
1,101005,1.083756
2,101006,5.355753
3,102004,1.566572
4,102005,92.579406


In [18]:
scores.describe()

df = scores

In [19]:
comp_2020 = pd.read_csv('compatibility_2020_title.csv')
comp_2021 = pd.read_csv('compatibility_2021_title.csv')
comp_2022 = pd.read_csv('compatibility_2022_title.csv')
comp_clef = pd.read_csv('compatibility_clef_title.csv')
comp_clef_v2 = pd.read_csv('compatibility_clef_layman.csv')

run_name_bm25 = {
    "2020": "all_res_misinfo-2020_bm25_title.csv",
    "2021": "all_res_C4-2021_bm25_query.csv",
    "2022": "all_res_C4-2022_bm25_query.csv",
    "CLEF": "all_res_clueweb-b13_bm25.csv",
    "clef": "all_res_clueweb-b13_bm25.csv",
    "CLEF_v2": "all_res_clueweb-b13_bm25_layman.csv",
    "clef_v2": "all_res_clueweb-b13_bm25_layman.csv"
}

run_name_minilm = {
    "2020": "rerank_bm25-cross-encoder_cross-encoder-ms-marco-MiniLM-L-12-v2_title_cleanhtml.txt",
    "2021": "title-rerank_bm25-cross-encoder_cross-encoder-ms-marco-MiniLM-L-12-v2_title_queries_query_cleanhtml.txt",
    "2022": "rerank_bm25-cross-encoder_cross-encoder-ms-marco-MiniLM-L-12-v2_title_cleanhtml.txt",
    "CLEF": "rerank_MiniLM-L-12-v2_top100_cleanhtml.txt",
    "clef": "rerank_MiniLM-L-12-v2_top100_cleanhtml.txt",
    "CLEF_v2": "rerank_bm25-cross-encoder_cross-encoder-ms-marco-MiniLM-L-12-v2_top100_cleanhtml.txt",
    "clef_v2": "rerank_bm25-cross-encoder_cross-encoder-ms-marco-MiniLM-L-12-v2_top100_cleanhtml.txt"
}

comp_2020 = comp_2020[comp_2020["run"] == run_name_minilm["2020"]]
comp_2021 = comp_2021[comp_2021["run"] == run_name_minilm["2021"]]
comp_2022 = comp_2022[comp_2022["run"] == run_name_minilm["2022"]]
comp_clef = comp_clef[comp_clef["run"] == run_name_minilm["clef"]]
comp_clef_v2 = comp_clef_v2[comp_clef_v2["run"] == run_name_minilm["CLEF_v2"]]

comps = {"2020": comp_2020, "2021": comp_2021, "2022": comp_2022, "clef": comp_clef, "clef_v2": comp_clef_v2}

In [20]:
label_mapping = {
    "misinfo-qrels-graded.helpful-only": "helpful",
    "misinfo-qrels-graded.harmful-only": "harmful",
    "misinfo-qrels.graded-helpful-only": "helpful",
    "misinfo-qrels.graded-harmful-only": "harmful",
    "task1_qtrust_mapped_positive.tsv": "helpful",
    "task1_qtrust_mapped_negative.tsv": "harmful"
}

for comp in comps.values():
    comp["qrels"] = comp["qrels"].map(label_mapping)

comps[corpus]

Unnamed: 0,run,qrels,p,all,101004,101005,101006,102004,102005,102006,...,147006,148004,148005,148006,149004,149005,149006,150004,150005,150006
2,rerank_bm25-cross-encoder_cross-encoder-ms-mar...,,0.95,0.1282,0.2565,0.5514,0.4375,0.0888,0.2691,0.1875,...,0.1661,0.0751,0.0003,0.1213,0.0557,0.2859,0.2968,0.2381,0.2164,0.3278
3,rerank_bm25-cross-encoder_cross-encoder-ms-mar...,,0.95,0.1287,0.0004,0.0004,0.0003,0.217,0.1571,0.2224,...,0.0642,0.0139,0.0825,0.0083,0.1993,0.004,0.0588,0.245,0.2978,0.1394


In [21]:
for key, comp in comps.items():
    comp_diff = comp.loc[:, ~comp.columns.isin(['run', "qrels", "p", 'all'])]

    comp_diff = comp_diff.diff() * (-1)

    comp_diff["run"] = run_name_minilm[corpus]
    comp_diff["qrels"] = "diff"
    comp_diff["p"] = 0.95
    comp_diff["all"] = np.nan
    comp_diff = comp_diff.iloc[1, :]

    comps[key] = pd.concat([comp, comp_diff.to_frame().T])

comps[corpus]
    

  comps[key] = pd.concat([comp, comp_diff.to_frame().T])
  comps[key] = pd.concat([comp, comp_diff.to_frame().T])
  comps[key] = pd.concat([comp, comp_diff.to_frame().T])
  comps[key] = pd.concat([comp, comp_diff.to_frame().T])
  comps[key] = pd.concat([comp, comp_diff.to_frame().T])


Unnamed: 0,run,qrels,p,all,101004,101005,101006,102004,102005,102006,...,147006,148004,148005,148006,149004,149005,149006,150004,150005,150006
2,rerank_bm25-cross-encoder_cross-encoder-ms-mar...,,0.95,0.1282,0.2565,0.5514,0.4375,0.0888,0.2691,0.1875,...,0.1661,0.0751,0.0003,0.1213,0.0557,0.2859,0.2968,0.2381,0.2164,0.3278
3,rerank_bm25-cross-encoder_cross-encoder-ms-mar...,,0.95,0.1287,0.0004,0.0004,0.0003,0.217,0.1571,0.2224,...,0.0642,0.0139,0.0825,0.0083,0.1993,0.004,0.0588,0.245,0.2978,0.1394
3,rerank_bm25-cross-encoder_cross-encoder-ms-mar...,diff,0.95,,0.2561,0.551,0.4372,-0.1282,0.112,-0.0349,...,0.1019,0.0612,-0.0822,0.113,-0.1436,0.2819,0.238,-0.0069,-0.0814,0.1884


In [22]:
new_row = {col: np.nan for col in comps[corpus].columns}
new_row["run"] = "pre_qpp_method"
new_row["qrels"] = "helpful"

for _, row in df.iterrows():
    col_name = str(int(row["topic"]))
    if col_name in new_row:
        new_row[col_name] = float(row["confidence"])
    else:
        print("ERROR")

data = pd.concat([comps[corpus], pd.DataFrame([new_row])], ignore_index=True)

In [23]:
data = data.T
data.head(10)

Unnamed: 0,0,1,2,3
run,rerank_bm25-cross-encoder_cross-encoder-ms-mar...,rerank_bm25-cross-encoder_cross-encoder-ms-mar...,rerank_bm25-cross-encoder_cross-encoder-ms-mar...,pre_qpp_method
qrels,,,diff,helpful
p,0.95,0.95,0.95,
all,0.1282,0.1287,,
101004,0.2565,0.0004,0.2561,1.578599
101005,0.5514,0.0004,0.551,1.083756
101006,0.4375,0.0003,0.4372,5.355753
102004,0.0888,0.217,-0.1282,1.566572
102005,0.2691,0.1571,0.112,92.579406
102006,0.1875,0.2224,-0.0349,55.845106


In [24]:
boxplot_data = data.iloc[4:, :].rename(columns={0: "helpful", 1: "harmful", 2: "diff", 3: "confidence"})
boxplot_data.head()

Unnamed: 0,helpful,harmful,diff,confidence
101004,0.2565,0.0004,0.2561,1.578599
101005,0.5514,0.0004,0.551,1.083756
101006,0.4375,0.0003,0.4372,5.355753
102004,0.0888,0.217,-0.1282,1.566572
102005,0.2691,0.1571,0.112,92.579406


In [25]:
boxplot_data.corr(method="pearson").round(3)   # Pearson correlation

Unnamed: 0,helpful,harmful,diff,confidence
helpful,1.0,-0.271,0.738,-0.002
harmful,-0.271,1.0,-0.85,-0.05
diff,0.738,-0.85,1.0,0.033
confidence,-0.002,-0.05,0.033,1.0


In [26]:
boxplot_data.corr(method="kendall").round(3)

Unnamed: 0,helpful,harmful,diff,confidence
helpful,1.0,-0.181,0.608,0.05
harmful,-0.181,1.0,-0.574,-0.036
diff,0.608,-0.574,1.0,0.062
confidence,0.05,-0.036,0.062,1.0


In [27]:
boxplot_data.corr(method="spearman").round(3)

Unnamed: 0,helpful,harmful,diff,confidence
helpful,1.0,-0.266,0.783,0.08
harmful,-0.266,1.0,-0.738,-0.065
diff,0.783,-0.738,1.0,0.1
confidence,0.08,-0.065,0.1,1.0


In [28]:
from scipy.stats import pearsonr
from scipy.stats import kendalltau
from scipy.stats import spearmanr

def corr_with_pvalues(df):
    cols = df.columns
    n = len(cols)
    
    # DataFrames to store results
    corr_matrix = pd.DataFrame(np.zeros((n, n)), columns=cols, index=cols)
    pval_matrix = pd.DataFrame(np.ones((n, n)), columns=cols, index=cols)
    
    for i in range(n):
        for j in range(n):
            if i <= j:  # compute only upper triangle and diagonal
                col1 = df.iloc[:, i]
                col2 = df.iloc[:, j]
                # drop NaNs
                valid = col1.notna() & col2.notna()
                if valid.sum() > 1:  # Need at least 2 valid points
                    r, p = pearsonr(col1[valid].astype(float), col2[valid].astype(float))
                    corr_matrix.iloc[i, j] = r
                    corr_matrix.iloc[j, i] = r
                    pval_matrix.iloc[i, j] = p
                    pval_matrix.iloc[j, i] = p
    
    return corr_matrix, pval_matrix


def kendall_corr_with_pvalues(df):
    cols = df.columns
    n = len(cols)

    corr_matrix = pd.DataFrame(np.zeros((n, n)), columns=cols, index=cols)
    pval_matrix = pd.DataFrame(np.ones((n, n)), columns=cols, index=cols)

    for i in range(n):
        for j in range(i, n):  # only upper triangle and diagonal
            col1 = pd.to_numeric(df.iloc[:, i], errors='coerce')
            col2 = pd.to_numeric(df.iloc[:, j], errors='coerce')
            valid = col1.notna() & col2.notna()
            if valid.sum() > 1:
                r, p = kendalltau(col1[valid].astype(float), col2[valid].astype(float))
                corr_matrix.iloc[i, j] = r
                corr_matrix.iloc[j, i] = r
                pval_matrix.iloc[i, j] = p
                pval_matrix.iloc[j, i] = p

    return corr_matrix, pval_matrix


def spearman_corr_with_pvalues(df):
    cols = df.columns
    n = len(cols)

    corr_matrix = pd.DataFrame(np.zeros((n, n)), columns=cols, index=cols)
    pval_matrix = pd.DataFrame(np.ones((n, n)), columns=cols, index=cols)

    for i in range(n):
        for j in range(i, n):  # only upper triangle and diagonal
            col1 = pd.to_numeric(df.iloc[:, i], errors='coerce')
            col2 = pd.to_numeric(df.iloc[:, j], errors='coerce')
            valid = col1.notna() & col2.notna()
            if valid.sum() > 1:
                r, p = spearmanr(col1[valid].astype(float), col2[valid].astype(float))
                corr_matrix.iloc[i, j] = r
                corr_matrix.iloc[j, i] = r
                pval_matrix.iloc[i, j] = p
                pval_matrix.iloc[j, i] = p

    return corr_matrix, pval_matrix

In [29]:
corr_matrix, pval_matrix = spearman_corr_with_pvalues(boxplot_data)

In [30]:
corr_matrix.round(3)

Unnamed: 0,helpful,harmful,diff,confidence
helpful,1.0,-0.266,0.783,0.08
harmful,-0.266,1.0,-0.738,-0.065
diff,0.783,-0.738,1.0,0.1
confidence,0.08,-0.065,0.1,1.0


In [31]:
alpha = 0.05
significant_mask = pval_matrix < alpha

# p-value < alpha => significant
significant_mask

Unnamed: 0,helpful,harmful,diff,confidence
helpful,True,True,True,False
harmful,True,True,True,False
diff,True,True,True,False
confidence,False,False,False,True


# p values

In [32]:
pval_matrix[["helpful", "harmful", "diff"]].round(3)

Unnamed: 0,helpful,harmful,diff
helpful,0.0,0.001,0.0
harmful,0.001,0.0,0.0
diff,0.0,0.0,0.0
confidence,0.329,0.43,0.224


In [33]:
ndcg_harm_2020 = pd.read_csv(f'ndcg_harmful_only_results/ndcg_harmful_only_output_minilm_2020.csv')
ndcg_harm_2021 = pd.read_csv(f'ndcg_harmful_only_results/ndcg_harmful_only_output_minilm_2021.csv')
ndcg_harm_2022 = pd.read_csv(f'ndcg_harmful_only_results/ndcg_harmful_only_output_minilm_2022.csv')
ndcg_harm_clef_v2 = pd.read_csv(f'ndcg_harmful_only_results/ndcg_harmful_only_output_minilm_CLEF.csv')

ndcg_harms = {"2020": ndcg_harm_2020, "2021": ndcg_harm_2021, "2022": ndcg_harm_2022, "clef_v2": ndcg_harm_clef_v2}

In [34]:
for k, ndcg_df in ndcg_harms.items():
    ndcg_harms[k] = ndcg_df[ndcg_df["topic"] != "all"]

ndcg_harms[corpus]

Unnamed: 0,run,topic,ndcg_cut_5,ndcg_cut_10,ndcg_cut_15,ndcg_cut_20,ndcg_cut_30,ndcg_cut_100,ndcg_cut_200,ndcg_cut_500,ndcg_cut_1000
0,minilm_layman_CLEF,101004,0.0656,0.0426,0.0543,0.0623,0.0478,0.0572,0.0989,0.1681,0.2032
1,minilm_layman_CLEF,101005,0.0000,0.0000,0.0000,0.0000,0.0116,0.0398,0.0447,0.1063,0.1893
2,minilm_layman_CLEF,101006,0.0730,0.0474,0.0367,0.0306,0.0357,0.0432,0.0651,0.1873,0.2297
3,minilm_layman_CLEF,102004,0.8614,0.7345,0.6162,0.5297,0.4535,0.3535,0.3258,0.2859,0.3142
4,minilm_layman_CLEF,102005,0.5104,0.3312,0.3036,0.2702,0.2554,0.1768,0.1326,0.1592,0.2076
...,...,...,...,...,...,...,...,...,...,...,...
145,minilm_layman_CLEF,149005,0.0656,0.0426,0.0548,0.0457,0.0576,0.1034,0.1071,0.1294,0.1534
146,minilm_layman_CLEF,149006,0.1312,0.1170,0.0907,0.0755,0.0691,0.0576,0.0540,0.0711,0.0750
147,minilm_layman_CLEF,150004,0.6726,0.4712,0.4338,0.3611,0.3822,0.4679,0.3553,0.3163,0.3544
148,minilm_layman_CLEF,150005,0.6040,0.4286,0.4017,0.4695,0.4669,0.4986,0.4023,0.4496,0.4877


In [35]:
ndcg_harms[corpus]["confidence"] = df["confidence"]
ndcg_harms[corpus]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ndcg_harms[corpus]["confidence"] = df["confidence"]


Unnamed: 0,run,topic,ndcg_cut_5,ndcg_cut_10,ndcg_cut_15,ndcg_cut_20,ndcg_cut_30,ndcg_cut_100,ndcg_cut_200,ndcg_cut_500,ndcg_cut_1000,confidence
0,minilm_layman_CLEF,101004,0.0656,0.0426,0.0543,0.0623,0.0478,0.0572,0.0989,0.1681,0.2032,1.578599
1,minilm_layman_CLEF,101005,0.0000,0.0000,0.0000,0.0000,0.0116,0.0398,0.0447,0.1063,0.1893,1.083756
2,minilm_layman_CLEF,101006,0.0730,0.0474,0.0367,0.0306,0.0357,0.0432,0.0651,0.1873,0.2297,5.355753
3,minilm_layman_CLEF,102004,0.8614,0.7345,0.6162,0.5297,0.4535,0.3535,0.3258,0.2859,0.3142,1.566572
4,minilm_layman_CLEF,102005,0.5104,0.3312,0.3036,0.2702,0.2554,0.1768,0.1326,0.1592,0.2076,92.579406
...,...,...,...,...,...,...,...,...,...,...,...,...
145,minilm_layman_CLEF,149005,0.0656,0.0426,0.0548,0.0457,0.0576,0.1034,0.1071,0.1294,0.1534,2.155261
146,minilm_layman_CLEF,149006,0.1312,0.1170,0.0907,0.0755,0.0691,0.0576,0.0540,0.0711,0.0750,5.814468
147,minilm_layman_CLEF,150004,0.6726,0.4712,0.4338,0.3611,0.3822,0.4679,0.3553,0.3163,0.3544,4.088359
148,minilm_layman_CLEF,150005,0.6040,0.4286,0.4017,0.4695,0.4669,0.4986,0.4023,0.4496,0.4877,1.977308


In [36]:
cols= ndcg_harms[corpus].columns
cols = cols.drop(["run", "topic"])
cols

Index(['ndcg_cut_5', 'ndcg_cut_10', 'ndcg_cut_15', 'ndcg_cut_20',
       'ndcg_cut_30', 'ndcg_cut_100', 'ndcg_cut_200', 'ndcg_cut_500',
       'ndcg_cut_1000', 'confidence'],
      dtype='object')

In [37]:
ndcg_harms[corpus][cols].corr(method="pearson").round(3)

Unnamed: 0,ndcg_cut_5,ndcg_cut_10,ndcg_cut_15,ndcg_cut_20,ndcg_cut_30,ndcg_cut_100,ndcg_cut_200,ndcg_cut_500,ndcg_cut_1000,confidence
ndcg_cut_5,1.0,0.945,0.917,0.888,0.851,0.753,0.714,0.616,0.575,0.034
ndcg_cut_10,0.945,1.0,0.977,0.957,0.926,0.815,0.776,0.642,0.603,-0.009
ndcg_cut_15,0.917,0.977,1.0,0.988,0.967,0.866,0.823,0.691,0.651,-0.015
ndcg_cut_20,0.888,0.957,0.988,1.0,0.984,0.889,0.848,0.713,0.67,-0.01
ndcg_cut_30,0.851,0.926,0.967,0.984,1.0,0.931,0.89,0.759,0.717,-0.009
ndcg_cut_100,0.753,0.815,0.866,0.889,0.931,1.0,0.976,0.866,0.829,0.029
ndcg_cut_200,0.714,0.776,0.823,0.848,0.89,0.976,1.0,0.928,0.898,-0.0
ndcg_cut_500,0.616,0.642,0.691,0.713,0.759,0.866,0.928,1.0,0.985,-0.025
ndcg_cut_1000,0.575,0.603,0.651,0.67,0.717,0.829,0.898,0.985,1.0,-0.025
confidence,0.034,-0.009,-0.015,-0.01,-0.009,0.029,-0.0,-0.025,-0.025,1.0


In [38]:
ndcg_harms[corpus][cols].corr(method="kendall").round(3)

Unnamed: 0,ndcg_cut_5,ndcg_cut_10,ndcg_cut_15,ndcg_cut_20,ndcg_cut_30,ndcg_cut_100,ndcg_cut_200,ndcg_cut_500,ndcg_cut_1000,confidence
ndcg_cut_5,1.0,0.803,0.764,0.727,0.678,0.572,0.55,0.454,0.431,0.001
ndcg_cut_10,0.803,1.0,0.873,0.827,0.774,0.631,0.602,0.477,0.45,-0.031
ndcg_cut_15,0.764,0.873,1.0,0.913,0.848,0.691,0.649,0.512,0.476,-0.041
ndcg_cut_20,0.727,0.827,0.913,1.0,0.896,0.716,0.674,0.531,0.493,-0.04
ndcg_cut_30,0.678,0.774,0.848,0.896,1.0,0.772,0.721,0.572,0.531,-0.043
ndcg_cut_100,0.572,0.631,0.691,0.716,0.772,1.0,0.876,0.691,0.648,-0.024
ndcg_cut_200,0.55,0.602,0.649,0.674,0.721,0.876,1.0,0.779,0.73,-0.021
ndcg_cut_500,0.454,0.477,0.512,0.531,0.572,0.691,0.779,1.0,0.896,-0.026
ndcg_cut_1000,0.431,0.45,0.476,0.493,0.531,0.648,0.73,0.896,1.0,-0.018
confidence,0.001,-0.031,-0.041,-0.04,-0.043,-0.024,-0.021,-0.026,-0.018,1.0


In [39]:
ndcg_harms[corpus][cols].corr(method="spearman").round(3)

Unnamed: 0,ndcg_cut_5,ndcg_cut_10,ndcg_cut_15,ndcg_cut_20,ndcg_cut_30,ndcg_cut_100,ndcg_cut_200,ndcg_cut_500,ndcg_cut_1000,confidence
ndcg_cut_5,1.0,0.944,0.92,0.898,0.861,0.763,0.737,0.637,0.603,-0.006
ndcg_cut_10,0.944,1.0,0.977,0.959,0.931,0.822,0.795,0.665,0.631,-0.053
ndcg_cut_15,0.92,0.977,1.0,0.988,0.967,0.869,0.835,0.7,0.661,-0.066
ndcg_cut_20,0.898,0.959,0.988,1.0,0.983,0.887,0.855,0.719,0.678,-0.061
ndcg_cut_30,0.861,0.931,0.967,0.983,1.0,0.927,0.892,0.758,0.715,-0.068
ndcg_cut_100,0.763,0.822,0.869,0.887,0.927,1.0,0.979,0.869,0.833,-0.037
ndcg_cut_200,0.737,0.795,0.835,0.855,0.892,0.979,1.0,0.929,0.897,-0.031
ndcg_cut_500,0.637,0.665,0.7,0.719,0.758,0.869,0.929,1.0,0.983,-0.037
ndcg_cut_1000,0.603,0.631,0.661,0.678,0.715,0.833,0.897,0.983,1.0,-0.026
confidence,-0.006,-0.053,-0.066,-0.061,-0.068,-0.037,-0.031,-0.037,-0.026,1.0


In [40]:
k = 10

harmful_at_k_2020 = pd.read_csv(f'harmful_at_k/harmful_at_{str(k)}_2020_bm25.csv')
harmful_at_k_2021 = pd.read_csv(f'harmful_at_k/harmful_at_{str(k)}_2021_bm25.csv')
harmful_at_k_2022 = pd.read_csv(f'harmful_at_k/harmful_at_{str(k)}_2022_bm25.csv')
harmful_at_k_clef = pd.read_csv(f'harmful_at_k/harmful_at_{str(k)}_clef_bm25.csv')
harmful_at_k_clef_v2 = pd.read_csv(f'harmful_at_k/harmful_at_{str(k)}_clef_v2_bm25.csv')

# harmful_at_k_2020 = pd.read_csv(f'harmful_at_k/harmful_at_{str(k)}_2020_minilm12.csv')
# harmful_at_k_2021 = pd.read_csv(f'harmful_at_k/harmful_at_{str(k)}_2021_minilm12.csv')
# harmful_at_k_2022 = pd.read_csv(f'harmful_at_k/harmful_at_{str(k)}_2022_minilm12.csv')
# harmful_at_k_clef = pd.read_csv(f'harmful_at_k/harmful_at_{str(k)}_clef_minilm12.csv')
# harmful_at_k_clef_v2 = pd.read_csv(f'harmful_at_k/harmful_at_{str(k)}_clef_v2_minilm12.csv')


harmfuls_at_k = {"2020": harmful_at_k_2020, "2021": harmful_at_k_2021, "2022": harmful_at_k_2022, "clef": harmful_at_k_clef, "clef_v2": harmful_at_k_clef_v2}

harmful_at_k_2020.head()

Unnamed: 0,topic,harmful_at_10
0,1,0.0
1,2,0.0
2,4,0.0
3,6,0.1
4,7,0.0


In [41]:
harmfuls_at_k[corpus]["confidence"] = df["confidence"]

In [42]:
harmfuls_at_k[corpus].corr(method="pearson").round(3)

Unnamed: 0,topic,harmful_at_10,confidence
topic,1.0,-0.019,-0.016
harmful_at_10,-0.019,1.0,0.03
confidence,-0.016,0.03,1.0


In [43]:
harmfuls_at_k[corpus].corr(method="kendall").round(3)

Unnamed: 0,topic,harmful_at_10,confidence
topic,1.0,0.001,0.046
harmful_at_10,0.001,1.0,-0.028
confidence,0.046,-0.028,1.0


In [44]:
harmfuls_at_k[corpus].corr(method="spearman").round(3)

Unnamed: 0,topic,harmful_at_10,confidence
topic,1.0,0.006,0.075
harmful_at_10,0.006,1.0,-0.037
confidence,0.075,-0.037,1.0
