# Table Axiom Distribution On Trec DL

In [1]:
import pandas as pd
from tqdm import tqdm


In [4]:
AXIOM_DISPLAY_NAMES = {
    "ArgUC_preference": 'ArgUC',
    "QTArg_preference": 'QTArg',
    "QTPArg_preference": 'QTPArg',
    "aSL_preference": 'aSL',
    "LNC1_preference": 'LNC1',
    "TF-LNC_preference": 'TF_LNC',
    "LB1_preference": 'LB1',
    "PROX1_preference": 'PROX1',
    "PROX2_preference": 'PROX2',
    "PROX3_preference": 'PROX3',
    "PROX4_preference": 'PROX4',
    "PROX5_preference": 'PROX5',
    "REG_preference": 'REG',
    "ANTI-REG_preference": 'ANTI_REG',
    "ASPECT-REG_preference": 'ASPECT_REG',
    "AND_preference": 'AND',
    "LEN-AND_preference": 'LEN_AND',
    "M-AND_preference": 'M_AND',
    "LEN-M-AND_preference": 'LEN_M_AND',
    "DIV_preference": 'DIV',
    "LEN-DIV_preference": 'LEN_DIV',
    "TFC1_preference": 'TFC1',
    "TFC3_preference": 'TFC3',
    "M-TDC_preference": 'M_TDC',
    "LEN-M-TDC_preference": 'LEN_M_TDC',
    "STMC1_preference": 'STMC1',
    "STMC2_preference": 'STMC2',
}

def load_approach(csv):
    df = pd.read_csv('/mnt/ceph/storage/data-in-progress/data-research/web-search/ir-axioms/preferences/2022-02-18/' + csv)
    df_analysis = []
    for _, doc_pair in tqdm(df.iterrows(), total=len(df)):
        if doc_pair['rank_a'] < doc_pair['rank_b']:
            row = {}
            for axiom, axiom_name in AXIOM_DISPLAY_NAMES.items():

                row[axiom_name + '_' + str(doc_pair[axiom])] = 1
            df_analysis += [row]
    df_analysis = pd.DataFrame(df_analysis)
    return df_analysis

In [7]:
def cells(axiom, df):
    return str(int(df.get(axiom + '_-1', 0))) + ' & ' + str(int(df.get(axiom + '_0', 0))) + ' & ' + str(int(df.get(axiom + '_1', 0)))

def report_row(axiom, p_2019, d_2019, p_2020, d_2020):
    return ('\\_'.join(axiom.split('_'))) + ' & ' +  cells(axiom, p_2019) + ' & ' + cells(axiom, d_2019) + ' & ' + cells(axiom, p_2020) + ' & ' +  cells(axiom, d_2020) + '\\\\'

def table(p_2019, d_2019, p_2020, d_2020):
    return '''
\\begin{table*}[bt]
    \\setlength{\\tabcolsep}{0.22em}
    \\caption{TBD.}
    \\label{table-axiom-distribution-on-trec-dl}
    \\begin{tabular*}{\\textwidth}{@{\\extracolsep{\\fill}}l@{\\qquad}ccc@{\\quad}ccc@{\\quad}ccc@{\\quad}ccc@{}}

        \\toprule

        \\textbf{Axiom} & \\multicolumn{3}{@{}c@{\\quad}}{\\textbf{Passage@DL'19}} & \\multicolumn{3}{@{}c@{\\quad}}{\\textbf{Document@DL'19}} & \\multicolumn{3}{@{}c@{\\quad}}{\\textbf{Passage@DL'20}} & \\multicolumn{3}{@{}c@{\\quad}}{\\textbf{Document@DL'20}}\\\\

        \\cmidrule(r{.5em}){2-4} \\cmidrule(r{.5em}){5-7} \\cmidrule(r{.5em}){8-10} \\cmidrule(r{.5em}){11-13} 
        
        & -1 & 0 & 1 & -1 & 0 & 1 & -1 & 0 & 1 & -1 & 0 & 1 \\\\

        \\midrule

''' + ('\n'.join(report_row(ax, p_2019, d_2019, p_2020, d_2020) for ax in AXIOM_DISPLAY_NAMES.values())) + '''

        \\bottomrule

    \\end{tabular*}
\\end{table*}
'''

print(table( load_approach('trec-28-deep.passages-preferences-all-axioms-depth-10.csv').sum(),  load_approach('trec-28-deep.documents-preferences-all-axioms-depth-10.csv').sum(),  load_approach('trec-29-deep.passages-preferences-all-axioms-depth-10.csv').sum(),  load_approach('trec-29-deep.documents-preferences-all-axioms-depth-10.csv').sum()))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 132932/132932 [00:09<00:00, 14020.06it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 140367/140367 [00:09<00:00, 14133.60it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 268326/268326 [00:18<00:00, 14144.89it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 244195/244195 [00:17<00:00, 14057.01it/s]



\begin{table*}[bt]
    \setlength{\tabcolsep}{0.22em}
    \caption{TBD.}
    \label{table-axiom-distribution-on-trec-dl}
    \begin{tabular*}{\textwidth}{@{\extracolsep{\fill}}l@{\qquad}ccc@{\quad}ccc@{\quad}ccc@{\quad}ccc@{}}

        \toprule

        \textbf{Axiom} & \multicolumn{3}{@{}c@{\quad}}{\textbf{Passage@DL'19}} & \multicolumn{3}{@{}c@{\quad}}{\textbf{Document@DL'19}} & \multicolumn{3}{@{}c@{\quad}}{\textbf{Passage@DL'20}} & \multicolumn{3}{@{}c@{\quad}}{\textbf{Document@DL'20}}\\

        \cmidrule(r{.5em}){2-4} \cmidrule(r{.5em}){5-7} \cmidrule(r{.5em}){8-10} \cmidrule(r{.5em}){11-13} 
        
        & -1 & 0 & 1 & -1 & 0 & 1 & -1 & 0 & 1 & -1 & 0 & 1 \\

        \midrule

ArgUC & 3746 & 51316 & 4145 & 2124 & 58140 & 2357 & 8895 & 100771 & 9890 & 4375 & 100318 & 4141\\
QTArg & 2477 & 52538 & 4192 & 1512 & 58674 & 2435 & 6403 & 103448 & 9705 & 3083 & 100512 & 5239\\
QTPArg & 3496 & 50710 & 5001 & 1737 & 58428 & 2456 & 7859 & 99778 & 11919 & 3380 & 100434 & 5020\\
aSL & 2

In [25]:
df_analysis.sum()

ArgUC_0         51316.0
QTArg_0         52538.0
QTPArg_0        50710.0
aSL_0           54132.0
LNC1_0          57214.0
                 ...   
ANTI_REG_1      12976.0
M_TDC_-1          116.0
LEN_M_TDC_-1       44.0
TFC3_-1             6.0
TFC3_1             15.0
Length: 79, dtype: float64

In [32]:
str(int(df_analysis.sum()['ArgUC_0']))

'51316'

In [5]:
df[(df['qid'] == 156493) & (df['name'] == 'TUW19-p1-f')]

Unnamed: 0.1,Unnamed: 0,qid,query,docno_a,rank_a,score_a,name,docno_b,rank_b,score_b,...,CachedAxiom(axiom=RS_PL2(c=0.1))_preference,CachedAxiom(axiom=RS_QL(mu=1000))_preference,CachedAxiom(axiom=TFC1(margin_fraction=0.1))_preference,CachedAxiom(axiom=TFC3(margin_fraction=0.1))_preference,CachedAxiom(axiom=M_TDC())_preference,CachedAxiom(axiom=LEN_M_TDC(margin_fraction=0.1))_preference,CachedAxiom(axiom=STMC1())_preference,CachedAxiom(axiom=STMC1_fastText())_preference,CachedAxiom(axiom=STMC2())_preference,CachedAxiom(axiom=STMC2_fastText())_preference
0,0,156493,do goldfish grow,3288600,0,-1.176312,TUW19-p1-f,3288600,0,-1.176312,...,0,0,0,0,0,0,0,0,0,0
1,1,156493,do goldfish grow,3288600,0,-1.176312,TUW19-p1-f,3288597,1,-1.202135,...,1,1,0,0,0,0,-1,-1,0,0
2,2,156493,do goldfish grow,3288600,0,-1.176312,TUW19-p1-f,3288601,2,-1.268589,...,1,1,0,0,0,0,-1,-1,0,0
3,3,156493,do goldfish grow,3288600,0,-1.176312,TUW19-p1-f,8182159,3,-1.307820,...,1,1,0,0,0,0,-1,-1,0,0
4,4,156493,do goldfish grow,3288600,0,-1.176312,TUW19-p1-f,2612490,4,-1.355330,...,1,1,0,0,0,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,95,156493,do goldfish grow,6775071,9,-1.620916,TUW19-p1-f,1960255,5,-1.393404,...,-1,-1,-1,0,0,0,-1,1,-1,-1
96,96,156493,do goldfish grow,6775071,9,-1.620916,TUW19-p1-f,8182165,6,-1.449671,...,-1,-1,0,0,0,0,1,1,-1,-1
97,97,156493,do goldfish grow,6775071,9,-1.620916,TUW19-p1-f,95515,7,-1.495800,...,-1,-1,0,0,0,0,1,1,-1,-1
98,98,156493,do goldfish grow,6775071,9,-1.620916,TUW19-p1-f,1960261,8,-1.588573,...,-1,-1,0,0,0,0,1,1,-1,-1


In [4]:
df['ORIG_preference']

0         0
1         1
2         1
3         1
4         1
         ..
132927   -1
132928   -1
132929   -1
132930   -1
132931    0
Name: ORIG_preference, Length: 132932, dtype: int64

In [13]:
df['CachedAxiom(axiom=TFC1(margin_fraction=0.1))_preference'].unique()

array([ 0,  1, -1])

In [10]:
AXIOMS

["CachedAxiom(axiom=ArgUC(models=frozenset({'tag-ibm-fasttext'}), api_url='https://demo.webis.de/targer-api/', margin_fraction=0.1))_preference",
 "CachedAxiom(axiom=QTArg(models=frozenset({'tag-ibm-fasttext'}), api_url='https://demo.webis.de/targer-api/', normalize=True, margin_fraction=0.1))_preference",
 "CachedAxiom(axiom=QTPArg(models=frozenset({'tag-ibm-fasttext'}), api_url='https://demo.webis.de/targer-api/', normalize=True, penalty=10000000, margin_fraction=0.1))_preference",
 'CachedAxiom(axiom=aSL(min_sentence_length=12, max_sentence_length=20, margin_fraction=0.1))_preference',
 'CachedAxiom(axiom=LNC1())_preference',
 'CachedAxiom(axiom=TF_LNC())_preference',
 'CachedAxiom(axiom=LB1())_preference',
 'CachedAxiom(axiom=PROX1())_preference',
 'CachedAxiom(axiom=PROX2())_preference',
 'CachedAxiom(axiom=PROX3())_preference',
 'CachedAxiom(axiom=PROX4())_preference',
 'CachedAxiom(axiom=PROX5())_preference',
 'CachedAxiom(axiom=REG())_preference',
 'CachedAxiom(axiom=REG_fastTe