In [1]:
%%time
import sys
sys.path.append('../python/')

from trectools import TrecRun, TrecQrel
import pandas as pd
from tqdm import tqdm
from run_file_processing import load_all_runs, normalize_run
from evaluation_util import __adjust_qrels_to_pool
from probability_estimation import load_pool_task, CountProbabilityEstimator, RunIndependentCountProbabilityEstimator, PoissonEstimator

RUNS = list(load_all_runs('../resources/processed/normalized-runs/trec-system-runs/trec13/robust').keys())

class PredictAlways0:
    def estimate_probabilities(run, qrels, relevance_level):
        return {0:1, 1:0, 2:0, 3:0}

class PredictAlways1:
    def estimate_probabilities(run, qrels, relevance_level):
        return {0:0, 1:1, 2:0, 3:0}

def eval_on_x(run, qrels_incomplete, qrels_complete):
    run = normalize_run(run, 10).run_data
    ret = []
    
    estimators = [('always-0', PredictAlways0()),
                  ('cnt', CountProbabilityEstimator()),
                  ('run-indep', RunIndependentCountProbabilityEstimator()),
                  ('poisson', PoissonEstimator()),
                  ('always-1', PredictAlways1())
                 ]
    
    for topic in tqdm(run['query'].unique()):
        run_for_topic = TrecRun()
        
        run_for_topic.run_data = pd.merge(run, qrels_incomplete[["query","docid","rel"]], how="left")
        run_for_topic.run_data = run_for_topic.run_data[run_for_topic.run_data['query'] == topic]
        
        df = run_for_topic.run_data.copy()
        df = df[df["rel"].isnull()]
        del df['rel']
        df = pd.merge(df, qrels_complete[["query","docid","rel"]], how="left")
        df = df[~df["rel"].isnull()]
        
        if len(df) == 0:
            continue
        
        expected = df['rel'].value_counts().to_dict()
        sum_expected = sum(expected.values())
        expected = {i: expected.get(i, 0)/sum_expected for i in [0, 1, 2, 3]}

        tmp = {'system': df.iloc[0]['system'], 'query': df.iloc[0]['query'], 'expected': expected}
        for estimator_name, estimator in estimators:
            tmp[estimator_name] = estimator.estimate_probabilities(run_for_topic, qrels)

        ret += [tmp]

    return pd.DataFrame(ret)

Load runs: 


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 110/110 [00:05<00:00, 18.83it/s]

CPU times: user 6.14 s, sys: 791 ms, total: 6.93 s
Wall time: 6.57 s





In [2]:
tmp_ret = []

for run_name in RUNS:
    run = normalize_run(TrecRun(run_name), 10)
    qrels = TrecQrel('../resources/unprocessed/topics-and-qrels/qrels.robust04.txt')
    task = {
        'working_directory': '../resources/',
        'trec_identifier': 'trec-system-runs-trec13-robust',
        'run': run_name.replace('../resources', 'src/main/resources')
    }
    pool = load_pool_task(task)
    pool = {k:v for k,v in pool.items() if 'depth-10-pool-incomplete-for' in k}
    assert len(pool) == 1
    pool = pool[list(pool.keys())[0]]
    
    qrels_incomplete = __adjust_qrels_to_pool(qrels, pool)
    
    tmp_ret += [eval_on_x(run, qrels_incomplete.qrels_data.copy(), qrels.qrels_data.copy())]

    if len(tmp_ret) > 2:
        break
#tmp_ret = pd.concat(tmp_ret)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 250/250 [03:33<00:00,  1.17it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 250/250 [06:10<00:00,  1.48s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 250/250 [02:14<00:00,  1.86it/s]


In [21]:




df = tmp_ret[0].copy()
for field in ['always-0', 'cnt', 'run-indep', 'poisson']:
    df[f'loss-{field}'] = df.apply(lambda i: jensenshannon_loss(i, field), axis=1)

df

Unnamed: 0,system,query,expected,always-0,cnt,run-indep,poisson,always-1,loss-always-0,loss-cnt,loss-run-indep,loss-poisson
0,apl04rsTDNw5,301,"{0: 0.0, 1: 1.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 9.997000899730082e-05, 1: 0.99970008997300...","{0: 0.9439921747525322, 1: 0.0525974183995171,...","{0: 2.7180100138671515e-32, 1: 1.0, 2: 2.71801...","{0: 0, 1: 1, 2: 0, 3: 0}",0.833,0.010,0.767,0.000
1,apl04rsTDNw5,306,"{0: 0.5, 1: 0.5, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 0.8748250349930015, 1: 0.1249750049990002,...","{0: 0.9439921747525322, 1: 0.0525974183995171,...","{0: 0.9999837197954541, 1: 1.6280204545874084e...","{0: 0, 1: 1, 2: 0, 3: 0}",0.465,0.294,0.375,0.464
2,apl04rsTDNw5,307,"{0: 0.0, 1: 1.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 0.7498500299940012, 1: 0.2499500099980004,...","{0: 0.9439921747525322, 1: 0.0525974183995171,...","{0: 0.9932609152250002, 1: 0.00673908477499984...","{0: 0, 1: 1, 2: 0, 3: 0}",0.833,0.617,0.767,0.820
3,apl04rsTDNw5,311,"{0: 0.0, 1: 1.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 0.11108889333244462, 1: 0.888711146659557,...","{0: 0.9439921747525322, 1: 0.0525974183995171,...","{0: 1.297372370556767e-07, 1: 0.99999987026276...","{0: 0, 1: 1, 2: 0, 3: 0}",0.833,0.201,0.767,0.000
4,apl04rsTDNw5,313,"{0: 0.0, 1: 1.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 9.997000899730082e-05, 1: 0.99970008997300...","{0: 0.9439921747525322, 1: 0.0525974183995171,...","{0: 2.718010013867145e-16, 1: 0.99999999999999...","{0: 0, 1: 1, 2: 0, 3: 0}",0.833,0.010,0.767,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...
97,apl04rsTDNw5,686,"{0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 0.7998400319936013, 1: 0.19996000799840033...","{0: 0.9439921747525322, 1: 0.0525974183995171,...","{0: 0.9930878548582619, 1: 0.00691214514173723...","{0: 0, 1: 1, 2: 0, 3: 0}",0.000,0.274,0.141,0.049
98,apl04rsTDNw5,692,"{0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 0.3749250149970006, 1: 9.998000399920017e-...","{0: 0.9439921747525322, 1: 0.0525974183995171,...","{0: 0.05628495664862138, 1: 2.9371932040709627...","{0: 0, 1: 1, 2: 0, 3: 0}",0.000,0.539,0.141,0.764
99,apl04rsTDNw5,698,"{0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 0.856971462850287, 1: 9.998000399920017e-0...","{0: 0.943992174752532, 1: 0.05259741839951709,...","{0: 0.9997422335837128, 1: 4.994596752892839e-...","{0: 0, 1: 1, 2: 0, 3: 0}",0.000,0.229,0.141,0.009
100,apl04rsTDNw5,699,"{0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 0.5554444666622231, 1: 0.4443555733297785,...","{0: 0.9439921747525322, 1: 0.0525974183995171,...","{0: 0.7319633354719777, 1: 0.2680366645280222,...","{0: 0, 1: 1, 2: 0, 3: 0}",0.000,0.432,0.141,0.321


In [22]:
df['loss-cnt'].mean()

0.35159803921568633

In [23]:
df['loss-always-0'].mean()

0.1758627450980392

In [19]:
df['loss-run-indep'].mean()

0.19124000000000002

In [20]:
df['loss-poisson'].mean()

0.20808

In [62]:


jensenshannon(np.array([0.8, 0.2, 0, 0]), np.array([0.0,0.0,0,1]))

0.8325546111576977

In [1]:
print('asaas')

asaas


In [8]:
import sys
sys.path.append('../python/')

from trectools import TrecRun, TrecQrel
import pandas as pd
from tqdm import tqdm
from run_file_processing import load_all_runs, normalize_run
from evaluation_util import __adjust_qrels_to_pool
from probability_estimation import load_pool_task, CountProbabilityEstimator, RunIndependentCountProbabilityEstimator, PoissonEstimator
import json
from io import StringIO
entries = []
import pandas as pd
from trectools import TrecRun
from copy import deepcopy

class PredictAlways0:
    def estimate_probabilities(run, qrels, relevance_level):
        return {0:1, 1:0, 2:0, 3:0}

class PredictAlways1:
    def estimate_probabilities(run, qrels, relevance_level):
        return {0:0, 1:1, 2:0, 3:0}


class PredictAlways005:
    def estimate_probabilities(run, qrels, relevance_level):
        return {0:0.95, 1:0.05, 2:0, 3:0}

    
class PredictAlways01:
    def estimate_probabilities(run, qrels, relevance_level):
        return {0:0.9, 1:0.1, 2:0, 3:0}

class PredictAlways015:
    def estimate_probabilities(run, qrels, relevance_level):
        return {0:0.85, 1:0.15, 2:0, 3:0}
    
def eval_line(line):
    estimators = [('always-0', PredictAlways0()),
                  ('cnt', CountProbabilityEstimator()),
                  ('run-indep', RunIndependentCountProbabilityEstimator()),
                  ('poisson', PoissonEstimator()),
                  ('poisson-0', PoissonEstimator(0)),
                  ('poisson-prob-fixed-001', PoissonEstimator(0, 0.01)),
                  ('poisson-prob-fixed-002', PoissonEstimator(0, 0.02)),
                  ('poisson-prob-fixed-005', PoissonEstimator(0, 0.05)),
                  ('poisson-prob-fixed-015', PoissonEstimator(0, 0.15)),
                  
                  ('poisson-prob-between-0025-01', PoissonEstimator(0, 0.0, 0.025, 0.1)),
                  ('poisson-prob-between-0025-0075', PoissonEstimator(0, 0.0, 0.025, 0.075)),
                  ('poisson-prob-between-0025-015', PoissonEstimator(0, 0.0, 0.025, 0.15)),
                  ('always-1', PredictAlways1()),
                  ('always-01', PredictAlways01()),
                  ('always-015', PredictAlways015()),
                  ('always-005', PredictAlways005()),
                 ]
    
    run_for_topic = line['run'].run_data
    qrels_incomplete = TrecQrel()
    qrels_incomplete.qrels_data = line['qrels_incomplete']
    qrels_complete = line['qrels_complete']
    
    #print(run_for_topic)
    str(qrels_incomplete.qrels_data[["query","docid","rel"]])
    str(run_for_topic.columns)
    
    run_for_topic = pd.merge(run_for_topic.copy(), qrels_incomplete.qrels_data[["query","docid","rel"]], how="left")
        
    df = run_for_topic.copy()
    df = df[df["rel"].isnull()]
    del df['rel']
    df = pd.merge(df, qrels_complete[["query","docid","rel"]], how="left")
    df = df[~df["rel"].isnull()]
        
    if len(df) == 0:
        return None
        
    expected = df['rel'].value_counts().to_dict()
    sum_expected = sum(expected.values())
    expected = {i: expected.get(i, 0)/sum_expected for i in [0, 1, 2, 3]}
    
    ret = {'system': df.iloc[0]['system'], 'query': df.iloc[0]['query'], 'expected': expected}
    
    run = TrecRun()
    run.run_data = run_for_topic
    
    for estimator_name, estimator in estimators:
        ret[estimator_name] = estimator.estimate_probabilities(run, qrels_incomplete)

    return ret

df = []
df_raw = []


for i in tqdm(list(open('probability-estimation-playground.jsonl', 'r'))[:2500]):
#    try:
        i = json.loads(i)
        i['qrels_incomplete'] = pd.read_json(StringIO(i['qrels_incomplete']), lines=True)
        i['qrels_complete'] = pd.read_json(StringIO(i['qrels_complete']), lines=True)
        run = TrecRun()
        run.run_data = pd.read_json(StringIO(i['run']), lines=True)
        i['run'] = run
        
        df_raw += [deepcopy(i)]
        
        i = eval_line(i)
        df += [i]
        
        if len(df) > 10000:
            break
#    except:
#        pass

df = pd.DataFrame([i for i in df if i])
df

100%|██████████| 2500/2500 [03:20<00:00, 12.49it/s]


Unnamed: 0,system,query,expected,always-0,cnt,run-indep,poisson,poisson-0,poisson-prob-fixed-001,poisson-prob-fixed-002,poisson-prob-fixed-005,poisson-prob-fixed-015,poisson-prob-between-0025-01,poisson-prob-between-0025-0075,poisson-prob-between-0025-015,always-1,always-01,always-015,always-005
0,apl04rsTDNw5,301,"{0: 0.0, 1: 1.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 9.997000899730082e-05, 1: 0.99970008997300...","{0: 0.6230637930384938, 1: 0.3767362469535079,...","{0: 0.999999999890898, 1: 1.0910190741714302e-...","{0: 0.9999999971046032, 1: 2.895396773762644e-...","{0: 0.9, 1: 0.09999999999999999, 2: 0, 3: 0}","{0: 0.8, 1: 0.19999999999999998, 2: 0, 3: 0}","{0: 0.5, 1: 0.5, 2: 0, 3: 0}","{0: -0.4999999999999998, 1: 1.4999999999999998...","{0: 0.46739130434782605, 1: 0.532608695652174,...","{0: 0.5615942028985508, 1: 0.4384057971014492,...","{0: 0.2789855072463767, 1: 0.7210144927536233,...","{0: 0, 1: 1, 2: 0, 3: 0}","{0: 0.9, 1: 0.1, 2: 0, 3: 0}","{0: 0.85, 1: 0.15, 2: 0, 3: 0}","{0: 0.95, 1: 0.05, 2: 0, 3: 0}"
1,apl04rsTDNw5,306,"{0: 0.5, 1: 0.5, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 0.8748250349930015, 1: 0.1249750049990002,...","{0: 0.5967392908852784, 1: 0.4030607491067231,...","{0: 0.9660622794788432, 1: 0.03393772052115688...","{0: 0.8316336462456893, 1: 0.16836635375431078...","{0: 0.99375, 1: 0.0062499999999999995, 2: 0, 3...","{0: 0.9875, 1: 0.012499999999999999, 2: 0, 3: 0}","{0: 0.96875, 1: 0.03125, 2: 0, 3: 0}","{0: 0.90625, 1: 0.09374999999999999, 2: 0, 3: 0}","{0: 0.9654777486910995, 1: 0.03452225130890053...","{0: 0.971776832460733, 1: 0.028223167539267013...","{0: 0.9528795811518325, 1: 0.04712041884816753...","{0: 0, 1: 1, 2: 0, 3: 0}","{0: 0.9, 1: 0.1, 2: 0, 3: 0}","{0: 0.85, 1: 0.15, 2: 0, 3: 0}","{0: 0.95, 1: 0.05, 2: 0, 3: 0}"
2,apl04rsTDNw5,307,"{0: 0.0, 1: 1.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 0.7498500299940012, 1: 0.2499500099980004,...","{0: 0.6465373591948277, 1: 0.3532626807971739,...","{0: 0.9935455108168427, 1: 0.00645448918315732...","{0: 0.945197733350551, 1: 0.05480226664944901,...","{0: 0.9875, 1: 0.012499999999999999, 2: 0, 3: 0}","{0: 0.975, 1: 0.024999999999999998, 2: 0, 3: 0}","{0: 0.9375, 1: 0.0625, 2: 0, 3: 0}","{0: 0.8125, 1: 0.18749999999999997, 2: 0, 3: 0}","{0: 0.935625, 1: 0.064375, 2: 0, 3: 0}","{0: 0.9466666666666667, 1: 0.05333333333333333...","{0: 0.9135416666666667, 1: 0.08645833333333333...","{0: 0, 1: 1, 2: 0, 3: 0}","{0: 0.9, 1: 0.1, 2: 0, 3: 0}","{0: 0.85, 1: 0.15, 2: 0, 3: 0}","{0: 0.95, 1: 0.05, 2: 0, 3: 0}"
3,apl04rsTDNw5,311,"{0: 0.0, 1: 1.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 0.11108889333244462, 1: 0.888711146659557,...","{0: 0.6665333599946677, 1: 0.33326667999733384...","{0: 0.9999999991082804, 1: 8.917196253716013e-...","{0: 0.9999999759235702, 1: 2.407642988503324e-...","{0: 0.9111111111111111, 1: 0.08888888888888888...","{0: 0.8222222222222222, 1: 0.17777777777777776...","{0: 0.5555555555555556, 1: 0.4444444444444444,...","{0: -0.33333333333333326, 1: 1.333333333333333...","{0: 0.5555555555555556, 1: 0.4444444444444444,...","{0: 0.6296296296296297, 1: 0.37037037037037035...","{0: 0.40740740740740744, 1: 0.5925925925925926...","{0: 0, 1: 1, 2: 0, 3: 0}","{0: 0.9, 1: 0.1, 2: 0, 3: 0}","{0: 0.85, 1: 0.15, 2: 0, 3: 0}","{0: 0.95, 1: 0.05, 2: 0, 3: 0}"
4,apl04rsTDNw5,313,"{0: 0.0, 1: 1.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 9.997000899730082e-05, 1: 0.99970008997300...","{0: 0.2592074177757041, 1: 0.7405926222162974,...","{0: 0.9999999346569103, 1: 6.534308970421387e-...","{0: 0.999999117868289, 1: 8.821317110068889e-0...","{0: 0.9, 1: 0.09999999999999999, 2: 0, 3: 0}","{0: 0.8, 1: 0.19999999999999998, 2: 0, 3: 0}","{0: 0.5, 1: 0.5, 2: 0, 3: 0}","{0: -0.4999999999999998, 1: 1.4999999999999998...","{0: 0.19444444444444442, 1: 0.8055555555555556...","{0: 0.37962962962962965, 1: 0.6203703703703703...","{0: -0.17592592592592582, 1: 1.175925925925925...","{0: 0, 1: 1, 2: 0, 3: 0}","{0: 0.9, 1: 0.1, 2: 0, 3: 0}","{0: 0.85, 1: 0.15, 2: 0, 3: 0}","{0: 0.95, 1: 0.05, 2: 0, 3: 0}"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
958,polyudp6,700,"{0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 0.7499250074992501, 1: 0.12498750124987501...","{0: 0.6126147948585423, 1: 0.19716338225332394...","{0: 0.9900240591838904, 1: 0.00997594081610961...","{0: 0.8988154574366025, 1: 0.10118454256339754...","{0: 0.99375, 1: 0.0062499999999999995, 2: 0, 3...","{0: 0.9875, 1: 0.012499999999999999, 2: 0, 3: 0}","{0: 0.96875, 1: 0.03125, 2: 0, 3: 0}","{0: 0.90625, 1: 0.09374999999999999, 2: 0, 3: 0}","{0: 0.9751320422535211, 1: 0.02486795774647887...","{0: 0.9782130281690141, 1: 0.02178697183098591...","{0: 0.9689700704225352, 1: 0.03102992957746479...","{0: 0, 1: 1, 2: 0, 3: 0}","{0: 0.9, 1: 0.1, 2: 0, 3: 0}","{0: 0.85, 1: 0.15, 2: 0, 3: 0}","{0: 0.95, 1: 0.05, 2: 0, 3: 0}"
959,apl04rsTs,301,"{0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 0.9997000899730083, 1: 9.997000899730082e-...","{0: 0.6230637930384938, 1: 0.3767362469535079,...","{0: 1.0, 1: 0.0, 2: 0, 3: 0}","{0: 1.0, 1: 0.0, 2: 0, 3: 0}","{0: 1.0, 1: 0.0, 2: 0, 3: 0}","{0: 1.0, 1: 0.0, 2: 0, 3: 0}","{0: 1.0, 1: 0.0, 2: 0, 3: 0}","{0: 1.0, 1: 0.0, 2: 0, 3: 0}","{0: 1.0, 1: 0.0, 2: 0, 3: 0}","{0: 1.0, 1: 0.0, 2: 0, 3: 0}","{0: 1.0, 1: 0.0, 2: 0, 3: 0}","{0: 0, 1: 1, 2: 0, 3: 0}","{0: 0.9, 1: 0.1, 2: 0, 3: 0}","{0: 0.85, 1: 0.15, 2: 0, 3: 0}","{0: 0.95, 1: 0.05, 2: 0, 3: 0}"
960,apl04rsTs,304,"{0: 0.0, 1: 1.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 0.7776222533271124, 1: 0.22217778666488924...","{0: 0.7613861843016012, 1: 0.2384138556904004,...","{0: 0.9960433400214969, 1: 0.00395665997850316...","{0: 0.950222664786573, 1: 0.04977733521342698,...","{0: 0.9777777777777777, 1: 0.02222222222222222...","{0: 0.9555555555555556, 1: 0.04444444444444444...","{0: 0.8888888888888888, 1: 0.1111111111111111,...","{0: 0.6666666666666667, 1: 0.3333333333333333,...","{0: 0.9047008547008547, 1: 0.09529914529914531...","{0: 0.9179487179487179, 1: 0.08205128205128205...","{0: 0.8782051282051282, 1: 0.12179487179487178...","{0: 0, 1: 1, 2: 0, 3: 0}","{0: 0.9, 1: 0.1, 2: 0, 3: 0}","{0: 0.85, 1: 0.15, 2: 0, 3: 0}","{0: 0.95, 1: 0.05, 2: 0, 3: 0}"
961,apl04rsTs,306,"{0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0}","{0: 1, 1: 0, 2: 0, 3: 0}","{0: 0.7141428857085724, 1: 0.28565715428342897...","{0: 0.5967392908852783, 1: 0.4030607491067231,...","{0: 0.9930505656640796, 1: 0.00694943433592043...","{0: 0.9482853782534753, 1: 0.05171462174652478...","{0: 0.9904761904761905, 1: 0.00952380952380952...","{0: 0.9809523809523809, 1: 0.01904761904761905...","{0: 0.9523809523809523, 1: 0.04761904761904761...","{0: 0.8571428571428572, 1: 0.14285714285714285...","{0: 0.9473946646721516, 1: 0.05260533532784842...","{0: 0.9569932685115932, 1: 0.04300673148840688...","{0: 0.9281974569932685, 1: 0.07180254300673149...","{0: 0, 1: 1, 2: 0, 3: 0}","{0: 0.9, 1: 0.1, 2: 0, 3: 0}","{0: 0.85, 1: 0.15, 2: 0, 3: 0}","{0: 0.95, 1: 0.05, 2: 0, 3: 0}"


In [9]:
from scipy.spatial.distance import jensenshannon

def jensenshannon_loss(data, field):
    fields = [0, 1, 2, 3]
    y_true = [data['expected'].get(i, 0) for i in fields]
    y_pred = [data[field].get(i, 0) for i in fields]
    
    return round(jensenshannon(y_true, y_pred), 3)

def mean(data, field):
    ret = 0
    
    for k in [0, 1, 2, 3]:
        ret += k * data[field].get(k, 0)
    
    return ret

for field in ['expected', 'always-0', 'cnt', 'run-indep', 'poisson', 'poisson-0', 'always-1', 'always-01', 'always-015', 'always-005', 'poisson-prob-fixed-001', 'poisson-prob-fixed-002', 'poisson-prob-fixed-005', 'poisson-prob-fixed-015', 'poisson-prob-between-0025-01', 'poisson-prob-between-0025-0075', 'poisson-prob-between-0025-015']:
    df[f'loss-{field}'] = df.apply(lambda i: jensenshannon_loss(i, field), axis=1)
    df[f'mean-{field}'] = df.apply(lambda i: mean(i, field), axis=1)

df.head(4)[['system', 'query', 'expected', 'poisson', 'loss-poisson']]

  return np.sqrt(js / 2.0)


Unnamed: 0,system,query,expected,poisson,loss-poisson
0,apl04rsTDNw5,301,"{0: 0.0, 1: 1.0, 2: 0.0, 3: 0.0}","{0: 0.999999999890898, 1: 1.0910190741714302e-...",0.833
1,apl04rsTDNw5,306,"{0: 0.5, 1: 0.5, 2: 0.0, 3: 0.0}","{0: 0.9660622794788432, 1: 0.03393772052115688...",0.399
2,apl04rsTDNw5,307,"{0: 0.0, 1: 1.0, 2: 0.0, 3: 0.0}","{0: 0.9935455108168427, 1: 0.00645448918315732...",0.821
3,apl04rsTDNw5,311,"{0: 0.0, 1: 1.0, 2: 0.0, 3: 0.0}","{0: 0.9999999991082804, 1: 8.917196253716013e-...",0.833


In [10]:
df_eval = []

for field in ['expected', 'always-0', 'cnt', 'run-indep', 'poisson',  'poisson-0', 'always-1', 'always-01', 'always-015', 'always-005', 'poisson-prob-fixed-001', 'poisson-prob-fixed-002', 'poisson-prob-fixed-005', 'poisson-prob-fixed-015', 'poisson-prob-between-0025-01', 'poisson-prob-between-0025-0075', 'poisson-prob-between-0025-015']:
    df_eval += [{'Method': field, 'JS Loss (Mean)': df[f'loss-{field}'].mean(), 'Mean Pred.': df[f'mean-{field}'].mean()}]

df_eval = pd.DataFrame(df_eval)
df_eval

Unnamed: 0,Method,JS Loss (Mean),Mean Pred.
0,expected,0.0,0.158769
1,always-0,0.129675,0.0
2,cnt,0.332568,0.456658
3,run-indep,0.280922,0.225078
4,poisson,0.136525,0.001439
5,poisson-0,0.15847,0.015532
6,always-1,0.721238,1.0
7,always-01,0.256796,0.1
8,always-015,0.284453,0.15
9,always-005,0.221108,0.05


In [None]:
P(Relevant|Ungejudged)

P(Rel=0|Ungejudged)
P(Rel=1|Ungejudged)
P(Rel=2|Ungejudged)