# Verification of F1 Metric

In [54]:
import numpy as np
from sklearn.metrics import precision_recall_fscore_support as prf
from sklearn.preprocessing import MultiLabelBinarizer
from typing import MutableSequence
from itertools import chain

In [7]:
def f1(outputs_batch: MutableSequence,
       targets_batch: MutableSequence) -> dict:
    """Calculate per class and macro F1 between the given predictions
    and targets

    Parameters
    ----------
    outputs_batch : MutableSequence
        Predictions of a batch.
    targets_batch : MutableSequence
        Targets of the batch.

    Returns
    -------
    scores : dict
        Dictionary containing the metric values.

    """

    per_class_prec = []
    per_class_rec = []

    num_classes = targets_batch.shape[-1]

    for cls in range(num_classes):
        tp = np.dot(targets_batch[:, cls], outputs_batch[:, cls])
        pp = np.sum(outputs_batch[:, cls])
        p = np.sum(targets_batch[:, cls])
        prec = tp/pp if pp != 0 else 0
        rec = tp/p if p != 0 else 0

        per_class_prec.append(prec)
        per_class_rec.append(rec)

    den = [per_class_prec[i] + per_class_rec[i]
           for i in range(len(per_class_rec))]
    num = [2 * (per_class_prec[i] * per_class_rec[i])
           for i in range(len(per_class_rec))]

    per_class_f1 = [num_val * 1./den_val if den_val != 0 else 0
                    for num_val, den_val in zip(num, den)]

    macro_f1 = sum(per_class_f1) * 1./len(per_class_f1)

    # Converting metrics to dictionaries for easier understanding
    # per_class_prec = {
    #         k: per_class_prec[i] for i, k in enumerate(target_names)}
    # per_class_rec = {
    #         k: per_class_rec[i] for i, k in enumerate(target_names)}
    # per_class_f1 = {
    #         k: per_class_f1[i] for i, k in enumerate(target_names)}

    scores = {
        'precision': per_class_prec,
        'recall': per_class_rec,
        'f1': per_class_f1,
        'macro_f1': macro_f1,
        }

    return scores

## Trying on single class classification

In [104]:
preds = np.array([[1, 0, 0, 0, 0],
                  [0, 0, 1, 0, 0],
                  [0, 1, 0, 0, 0],
                  [0, 0, 0, 1, 0],
                  [1, 0, 0, 0, 0],
                  [0, 0, 0, 0, 1],
                  [0, 0, 1, 0, 0],
                  [0, 1, 0, 0, 0]])

In [105]:
targets = np.array([[1, 0, 0, 0, 0],
                    [0, 1, 0, 0, 0],
                    [0, 0, 1, 0, 0],
                    [0, 0, 0, 1, 0],
                    [0, 0, 0, 0, 1],
                    [0, 0, 0, 0, 1],
                    [0, 0, 0, 1, 0],
                    [0, 0, 1, 0, 0]])

In [51]:
def convert_to_categorical(array):
    categories = []
    for item in array:
        print(item)
        pos = [val for val in np.where(item != 0)[0]]
        categories.append(pos)
        
    return categories

In [78]:
categorical = convert_to_categorical(preds)
cat_targets = convert_to_categorical(targets)

[1 0 0 0 0]
[0 0 1 0 0]
[0 1 0 0 0]
[0 0 0 1 0]
[1 0 0 0 0]
[0 0 0 0 1]
[0 0 1 0 0]
[0 1 0 0 0]
[1 0 0 0 0]
[0 1 0 0 0]
[0 0 1 0 0]
[0 0 0 1 0]
[0 0 0 0 1]
[0 0 0 0 1]
[0 0 0 1 0]
[0 0 1 0 0]


In [79]:
categorical = list(chain.from_iterable(categorical))
cat_targets = list(chain.from_iterable(cat_targets))

In [62]:
list(categorical)

[0, 2, 1, 3, 0, 4, 2, 1]

In [106]:
scores = f1(preds, targets)

In [107]:
scores

{'precision': [0.5, 0.0, 0.0, 1.0, 1.0],
 'recall': [1.0, 0.0, 0.0, 0.5, 0.5],
 'f1': [0.6666666666666666, 0, 0, 0.6666666666666666, 0.6666666666666666],
 'macro_f1': 0.4}

In [109]:
prf(targets, preds, average=None)

(array([0.5, 0. , 0. , 1. , 1. ]),
 array([1. , 0. , 0. , 0.5, 0.5]),
 array([0.66666667, 0.        , 0.        , 0.66666667, 0.66666667]),
 array([1, 1, 2, 2, 2]))

## Multi-Label Classification

In [88]:
preds = np.array([[1, 0, 1, 0, 0],
                  [0, 0, 1, 0, 0],
                  [0, 1, 0, 0, 0],
                  [0, 1, 0, 1, 1],
                  [1, 0, 1, 0, 0],
                  [0, 0, 1, 0, 1],
                  [0, 0, 1, 0, 0],
                  [0, 1, 1, 1, 1]])

In [89]:
targets = np.array([[1, 1, 0, 0, 0],
                    [0, 1, 1, 0, 0],
                    [0, 0, 1, 1, 0],
                    [0, 0, 0, 1, 1],
                    [0, 0, 0, 0, 1],
                    [0, 0, 0, 1, 1],
                    [0, 1, 1, 1, 0],
                    [1, 0, 1, 0, 0]])

In [90]:
scores = f1(preds, targets)

In [91]:
scores

{'precision': [0.5, 0.0, 0.5, 0.5, 0.6666666666666666],
 'recall': [0.5, 0.0, 0.75, 0.25, 0.6666666666666666],
 'f1': [0.5, 0, 0.6, 0.3333333333333333, 0.6666666666666666],
 'macro_f1': 0.42000000000000004}

In [95]:
categorical = convert_to_categorical(preds)
cat_targets = convert_to_categorical(targets)

[1 0 1 0 0]
[0 0 1 0 0]
[0 1 0 0 0]
[0 1 0 1 1]
[1 0 1 0 0]
[0 0 1 0 1]
[0 0 1 0 0]
[0 1 1 1 1]
[1 1 0 0 0]
[0 1 1 0 0]
[0 0 1 1 0]
[0 0 0 1 1]
[0 0 0 0 1]
[0 0 0 1 1]
[0 1 1 1 0]
[1 0 1 0 0]


In [96]:
categorical

[[0, 2], [2], [1], [1, 3, 4], [0, 2], [2, 4], [2], [1, 2, 3, 4]]

In [97]:
cat_targets

[[0, 1], [1, 2], [2, 3], [3, 4], [4], [3, 4], [1, 2, 3], [0, 2]]

In [100]:
m = MultiLabelBinarizer().fit(targets)

prf(m.transform(categorical),
    m.transform(cat_targets),
    average=None)



(array([0.5, 0. ]), array([0.5, 0. ]), array([0.5, 0. ]), array([2, 3]))

In [111]:
metrics = prf(targets, preds, average=None)

In [112]:
per_class_prec, per_class_rec, per_class_f1, per_class_support = metrics

In [113]:
per_class_prec

array([0.5, 0. , 0. , 1. , 1. ])

In [114]:
per_class_prec = {i: v for i, v in enumerate(per_class_prec)}

In [115]:
per_class_prec

{0: 0.5, 1: 0.0, 2: 0.0, 3: 1.0, 4: 1.0}

# Verification of mAP

In [117]:
import pytrec_eval
import os
import json

In [150]:
results_path = "/home/workboots/Results/advocate_recommendation/new/exp_3/cross_val/5_fold/fold_1/results/scores.json"

In [151]:
with open(results_path, 'r') as f:
    scores = json.load(f)

In [152]:
targets_path = "/home/workboots/Datasets/DHC/variations/new/var_1/targets/case_advs.json"

In [153]:
with open(targets_path, 'r') as f:
    targets = json.load(f)

In [154]:
targets = {k: {adv: 1 for adv in names} for k, names in targets.items() if k in scores.keys()}

In [177]:
evaluator = pytrec_eval.RelevanceEvaluator(
    targets, {'map', 'Rprec'})

In [178]:
results = evaluator.evaluate(scores)

In [179]:
results

{'633021': {'map': 0.11704168847025988, 'Rprec': 0.0},
 '177616015': {'map': 0.09090909090909091, 'Rprec': 0.0},
 '314844': {'map': 0.5476190476190477, 'Rprec': 0.5},
 '118511198': {'map': 0.07692307692307693, 'Rprec': 0.0},
 '34363167': {'map': 0.16666666666666666, 'Rprec': 0.0},
 '161399126': {'map': 0.029910714285714284, 'Rprec': 0.0},
 '146581536': {'map': 0.054093567251461985, 'Rprec': 0.0},
 '102124020': {'map': 0.05399719495091164, 'Rprec': 0.0},
 '45742581': {'map': 0.021739130434782608, 'Rprec': 0.0},
 '1438937': {'map': 0.022246272246272247, 'Rprec': 0.0},
 '27234590': {'map': 0.5, 'Rprec': 0.0},
 '39406078': {'map': 0.20238095238095238, 'Rprec': 0.0},
 '18574610': {'map': 0.16666666666666666, 'Rprec': 0.0},
 '65562438': {'map': 0.1, 'Rprec': 0.0},
 '80285629': {'map': 0.03617992177314211, 'Rprec': 0.0},
 '4749781': {'map': 0.2, 'Rprec': 0.0},
 '171371791': {'map': 0.01818181818181818, 'Rprec': 0.0},
 '160899534': {'map': 0.015151515151515152, 'Rprec': 0.0},
 '173787376': {'m

In [172]:
mAP = sum([value['map'] for k, value in results.items()])/len(results)

In [173]:
mAP

0.1833515095870315

In [174]:
rprec = sum([value['Rprec'] for k, value in results.items()])/len(results)

In [175]:
rprec

0.09497139764996906

In [176]:
recall = sum([value['recall'] for k, value in results.items()])/len(results)

KeyError: 'recall'