### Read ids and tag scores

In [None]:
import os
import time
import numpy as np

print(os.cpu_count(), "CPUs")
np.set_printoptions(precision=2, suppress=True, floatmode='fixed')

VisualSearch = "/home/mrim/deviv/Github_Repos_SoA/Hybrid_Space_Dual_Encoding/VisualSearch"

def getIdsTagScores(VisualSearch, split, con):
    t0 = time.time()
    print("Read ", end="")
    model_best = os.path.join(VisualSearch, "results/save_scores/msrvtt10k_model_best.pth.tar")
    with open(os.path.join(model_best, split+"_caption_ids.txt"), 'r') as f:
        cap_ids = [line.strip() for line in f]
    with open(os.path.join(model_best, split+"_video_ids.txt"), 'r') as f:
        vid_ids = [line.strip() for line in f]
    if con == "hybrid_512": run, similarity = "msrvtt10k_model_best.pth.tar", "jaccard"
    if con == "jaccard_512": run, similarity = "concept_jaccard_512_runs_3.pth", "jaccard"
    if con == "jaccard_256": run, similarity = "concept_jaccard_256_runs_5.pth", "jaccard"
    if con == "cosine_512": run, similarity = "concept_cosine_512_runs_13.pth", "cosine"
    if con == "cosine_256": run, similarity = "concept_cosine_256_runs_9.pth", "cosine"
    model_best = os.path.join(VisualSearch, "experiments/pre_sig_feats/"+con+"/"+split+"/"+run)
    cap_tag_scores = np.load(os.path.join(model_best, "cap_pre_sigmoid_"+split+".npy"))
    vid_tag_scores = np.load(os.path.join(model_best, "vid_pre_sigmoid_"+split+".npy"))
    ncap, nvid, ntag = cap_tag_scores.shape[0], vid_tag_scores.shape[0], vid_tag_scores.shape[1]
    print("(%d captions, %d videos, %d tags): %.2f seconds." % (ncap, nvid, ntag, time.time()-t0))
    return (cap_ids, vid_ids, cap_tag_scores, vid_tag_scores), similarity

### Transform scores

In [None]:
from scipy.special import expit
from sklearn.preprocessing import normalize

def spower(x,p):
    return np.sign(x)*np.power(np.absolute(x), p)

def simTransform(tag_scores, similarity, params):
    if similarity == "jaccard":
        return np.power(expit(params[1]*(tag_scores-params[0])), params[2])
    return normalize(spower(tag_scores-params[0], params[2]), axis=1, norm='l2')

def simTransforms(cap_tag_scores, vid_tag_scores, similarity, params):
    t0 = time.time()
    print("Transforms: ", end="")
    cap_tag_vals = simTransform(cap_tag_scores, similarity, params)
    vid_tag_vals = simTransform(vid_tag_scores, similarity, params)
    print("%.2f seconds." % (time.time()-t0))
    return cap_tag_vals, vid_tag_vals

### Similarity scores

In [None]:
import concurrent.futures

def getSimScoresJaccard(cap, vid, sel):
    cap_exp, vid_exp = np.expand_dims(cap, axis=1), np.expand_dims(vid, axis=0)
    cap_bro, vid_bro = np.broadcast_arrays(cap_exp, vid_exp)
    if sel is None:
        return (np.sum(np.minimum(cap_bro, vid_bro), axis=2, keepdims=True) /
                np.sum(np.maximum(cap_bro, vid_bro), axis=2, keepdims=True))
    min_bro = np.minimum(cap_bro, vid_bro)
    max_bro = np.maximum(cap_bro, vid_bro)
    ind_bro = np.argsort(min_bro, axis=2)
    min_bro = np.take_along_axis(min_bro, ind_bro, axis=2)
    max_bro = np.take_along_axis(max_bro, ind_bro, axis=2)
    return (np.cumsum(min_bro, axis=2) / np.cumsum(max_bro, axis=2))[:,:,sel-1]

def getSimScoresCosine(cap, vid, sel):
    cap_exp, vid_exp = np.expand_dims(cap, axis=1), np.expand_dims(vid, axis=0)
    cap_bro, vid_bro = np.broadcast_arrays(cap_exp, vid_exp)
    if sel is None:
        return (np.sum(cap_bro*vid_bro, axis=2, keepdims=True) /
                np.sqrt(np.sum(np.square(cap_bro), axis=2, keepdims=True) *
                        np.sum(np.square(vid_bro), axis=2, keepdims=True)))
    ind_bro = np.argsort(-np.absolute(cap_bro*vid_bro), axis=2)
    cap_bro = np.take_along_axis(cap_bro, ind_bro, axis=2)
    vid_bro = np.take_along_axis(vid_bro, ind_bro, axis=2)
    return (np.cumsum(cap_bro*vid_bro, axis=2) /
            np.sqrt(np.cumsum(np.square(cap_bro), axis=2) *
                    np.cumsum(np.square(vid_bro), axis=2)))[:,:,sel-1]

def getSimScoresBlock(cap, vid, similarity, sel):
    return getSimScoresJaccard(cap, vid, sel) if similarity == "jaccard" else getSimScoresCosine(cap, vid, sel)

def getSimScoresSel(cap, vid, similarity, sel, blockSize=16777216): # blockSize is in items, not in bytes
    nRows = blockSize//(vid.size) # max number of videos per block
    nBlocks = -(-cap.shape[0]//nRows) # ceil(quotient)
    print("Similarities (%d blocks): " % nBlocks, end="")
    blocks = np.array_split(cap, nBlocks)
    t0 = time.time()
    executor = concurrent.futures.ThreadPoolExecutor()
    futures = [executor.submit(getSimScoresBlock, block, vid, similarity, sel) for block in blocks]
    concurrent.futures.wait(futures)
    print("%.2f seconds." % (time.time()-t0))
    t0 = time.time()
    print("Concatenate ", end="")
    sim_scores = np.moveaxis(np.concatenate([block.result() for block in futures]), -1, 0)
    print(sim_scores.shape, end="")
    print(": %.2f seconds." % (time.time()-t0))
    return sim_scores

### Metrics

In [None]:
def blockMetrics(labels, scores):
    sorted_labels = np.take_along_axis(labels, np.argsort(-scores, axis=1), axis=1).astype(float)
    rel = np.cumsum(sorted_labels, axis=1)
    r1 = sorted_labels[:,0]
    r5 = np.max(sorted_labels[:,0:5], axis=1)
    r10 = np.max(sorted_labels[:,0:10], axis=1)
    ap = np.sum(sorted_labels*rel/np.arange(1.0, sorted_labels.shape[1]+1.0), axis=1)/rel[:,-1]
    ranks = np.argmax(sorted_labels, axis=1)
    return r1, r5, r10, ap, ranks

def getMetrics(labels, scores):
    nThreads = os.cpu_count()
    labels_blocks, scores_blocks = np.array_split(labels, nThreads), np.array_split(scores, nThreads)
    executor = concurrent.futures.ThreadPoolExecutor()
    futures = [executor.submit(blockMetrics, labels_block, scores_block)
               for labels_block, scores_block in zip(labels_blocks, scores_blocks)]
    concurrent.futures.wait(futures)
    result_blocks = [block.result() for block in futures]
    metrics = [np.mean(np.concatenate([result_block[i] for result_block in result_blocks])) for i in range(4)]
    ranks = np.concatenate([result_block[4] for result_block in result_blocks])
    metrics.append(np.floor(np.median(ranks))+1)
    metrics.append(np.mean(ranks)+1)
    metrics.append(metrics[0]+metrics[1]+metrics[2])
    return metrics

def allMetrics(labels, scores):
    ttv_metrics = getMetrics(labels, scores)
    vtt_metrics = getMetrics(np.transpose(labels), np.transpose(scores))
    metrics = ttv_metrics+vtt_metrics
    metrics.append((metrics[3]+metrics[10])/2)
    metrics.append(metrics[6]+metrics[13])
    return metrics

def printMetrics(metrics):
    print(" * Text to Video:")
    print(" * r_1_5_10, medr, meanr: [%.3f, %.3f, %.3f, %.1f, %.3f]" %
          (100*metrics[0], 100*metrics[1], 100*metrics[2], metrics[4], metrics[5]))
    print(" * recall sum: %.3f" % (100*metrics[6]))
    print(" * mAP: %.4f" % metrics[3])
    print(" * ----------")
    print(" * Video to text:")
    print(" * r_1_5_10, medr, meanr: [%.3f, %.3f, %.3f, %.1f, %.3f]" %
          (100*metrics[7], 100*metrics[8], 100*metrics[9], metrics[11], metrics[12]))
    print(" * recall sum: %.3f" % (100*metrics[13]))
    print(" * mAP: %.4f" % metrics[10])
    print(" * ----------")
    print(" * Global:")
    print(" * recall sum: %.3f" % (100*metrics[15]))
    print(" * mAP: %.4f" % metrics[14])
    print(" * ----------")

def printMetricsLatex(metrics):
    print("  & %4.1f & %4.1f & %4.1f & %2d & %4.1f && %4.1f & %4.1f & %4.1f & %2d & %4.1f & %5.1f & %4.1f \\\\" %
          (100*metrics[0], 100*metrics[1], 100*metrics[2], int(metrics[4]), 100*metrics[3], 100*metrics[7],
           100*metrics[8], 100*metrics[9], int(metrics[11]), 100*metrics[10], 100*metrics[15], 100*metrics[14]))

def getMetricsSel(labels, sim_scores, print_metrics=False, print_metrics_latex=False):
    t0 = time.time()
    print("Metrics ", end="")
    metrics = np.array([allMetrics(labels, scores) for scores in sim_scores])
    print(metrics.shape, end="")
    print(": %.2f seconds." % (time.time()-t0))
    if print_metrics:
        printMetrics(metrics[-1])
    if print_metrics_latex:
        printMetricsLatex(metrics[-1])
    return metrics

### Pairs for statistics

In [None]:
def matchedPairs(cap_ids, vid_ids):
    vid_idx = [vid_ids.index(cap_id.split('#', 1)[0]) for cap_id in cap_ids]
    return np.arange(len(cap_ids)), np.array(vid_idx)

def randomPairs(ncap, nvid, num):
    pairs = np.random.choice(ncap*nvid, num, replace=False)
    return pairs % ncap, pairs // ncap

def getPairs(scores, n, matched_pairs, random_pairs):
    t0 = time.time()
    sort_idx = np.argsort(-scores, axis=1)
    ttv_pairs_1 = np.arange(scores.shape[0]), sort_idx[:,0].flatten()
    ttv_pairs_n = np.repeat(np.arange(scores.shape[0]), n), sort_idx[:,:n].flatten()
    sort_idx = np.argsort(-scores, axis=0)
    vtt_pairs_1 = sort_idx[0,:].flatten(), np.arange(scores.shape[1])
    vtt_pairs_n = sort_idx[:n,:].flatten(), np.tile(np.arange(scores.shape[1]), n)
    pairs = [[ttv_pairs_1, vtt_pairs_1, matched_pairs], [ttv_pairs_n, vtt_pairs_n, random_pairs]]
    print("Pairs: %.2f seconds." % (time.time()-t0))
    return pairs

### Lists for statistics

In [None]:
def transposeLists(l):
    return(list(map(list, zip(*l))))

def getElems(cap_tag_scores, vid_tag_scores, similarity, matched_pairs, random_pairs, params, sel_step,
             get_metrics, get_pairs, print_metrics, print_metrics_latex):
    print("Params (shift=%.3f, scale=%.3f, power=%.3f)" % (params[0], params[1], params[2]))
    sel = np.unique(np.array([round(2**(i/sel_step)) for i in range(2*sel_step, 10*sel_step)
          if round(2**(i/sel_step)) <= cap_tag_scores.shape[1]])) if sel_step > 0 else None
    cap_tag_vals, vid_tag_vals = simTransforms(cap_tag_scores, vid_tag_scores, similarity, params)
    labels = np.zeros((cap_tag_scores.shape[0], vid_tag_scores.shape[0]), dtype=int)
    labels[matched_pairs[0],matched_pairs[1]] = 1
    sim_scores = getSimScoresSel(cap_tag_vals, vid_tag_vals, similarity, sel)
    metrics = getMetricsSel(labels, sim_scores, print_metrics=print_metrics, print_metrics_latex=print_metrics_latex) \
    if get_metrics else None
    pairs = getPairs(sim_scores[-1], 10, matched_pairs, random_pairs) if get_pairs else None
    return cap_tag_vals, vid_tag_vals, metrics, pairs

def getLists(ids_tag_scores, similarity, params_list=[[0.0, 1.0, 1.0]], sel_step=0,
             get_metrics=True, get_pairs=True, print_metrics=False, print_metrics_latex=False):
    cap_ids, vid_ids, cap_tag_scores, vid_tag_scores = ids_tag_scores
    matched_pairs = matchedPairs(cap_ids, vid_ids)
    random_pairs = randomPairs(cap_tag_scores.shape[0], vid_tag_scores.shape[0], 100000)
    return [getElems(cap_tag_scores, vid_tag_scores, similarity, matched_pairs, random_pairs, params, sel_step,
                     get_metrics=get_metrics, get_pairs=get_pairs, print_metrics=print_metrics,
                     print_metrics_latex=print_metrics_latex) for params in params_list]

### Contribution plots

In [None]:
def blockContribs(cap_tag_vals, vid_tag_vals, similarity, pairs, absolute):
    cap, vid = cap_tag_vals[pairs[0]], vid_tag_vals[pairs[1]]
    if similarity == "jaccard":
        contribs = np.minimum(cap, vid)
    else:
        contribs = np.absolute(cap*vid)
    if absolute == 0:
        ind_contribs = normalize(-np.sort(-contribs, axis=1), axis=1, norm='l1')
        cum_contribs = np.cumsum(ind_contribs, axis=1)
    if absolute == 1:
        ind_contribs = -np.sort(-contribs, axis=1)
        cum_contribs = np.cumsum(normalize(ind_contribs, axis=1, norm='l1'), axis=1)
    if absolute == 2:
        ind_contribs = -np.sort(-contribs, axis=1)
        cum_contribs = np.cumsum(ind_contribs, axis=1)
    return ind_contribs, cum_contribs

def statContribs(cap_tag_vals, vid_tag_vals, similarity, pairs, absolute=0):
    nThreads = os.cpu_count()
    pairs_blocks = [np.array_split(pairs[0], nThreads), np.array_split(pairs[1], nThreads)]
    executor = concurrent.futures.ThreadPoolExecutor()
    futures = [executor.submit(blockContribs, cap_tag_vals, vid_tag_vals, similarity, pairs_block, absolute)
               for pairs_block in zip(pairs_blocks[0], pairs_blocks[1])]
    done, not_done = concurrent.futures.wait(futures)
    result_blocks = [block.result() for block in futures]
    ind_contribs, cum_contribs = [np.concatenate([result_block[i] for result_block in result_blocks]) for i in range(2)]
    return np.average(ind_contribs, axis=0), np.std(ind_contribs, axis=0, ddof=1), \
           np.average(cum_contribs, axis=0), np.std(cum_contribs, axis=0, ddof=1)

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
formatter = FuncFormatter(lambda y, _: '{:.16g}'.format(y))

h, w = 2, 3
titles = [["Text to Video Top-1", "Video to Text Top-1","Matched pairs"],
          ["Text to Video Top-10", "Video to Text Top-10", "Random pairs"]]

def margeLimits(dmin, dmax, margin=0.05):
    return dmin-margin*(dmax-dmin), dmax+margin*(dmax-dmin)

def plotStatContribs(lists, similarity, titles, legends=None, scale="linear", stdDev=True, absolute=0, all_labels=False,
                     pdf="curves.pdf"):
    t0 = time.time()
    contribsList = [[[statContribs(cap_tag_vals, vid_tag_vals, similarity, pairs, absolute=absolute) \
    for pairs in pairsRow] for pairsRow in pairsMatrix] for (cap_tag_vals, vid_tag_vals, _, pairsMatrix) in lists]
    contribsLists = np.moveaxis(np.array(contribsList, dtype=object), 0, 2)
    print("Stats: %.2f seconds." % (time.time()-t0))
    t0 = time.time()
    if stdDev:
        col1, col2 = 'steelblue', 'red'
    else:
        col1, col2 = 'black', 'black'
    fig, axes = plt.subplots(h, w, figsize=(16+2*all_labels,8+all_labels))
    fig.tight_layout(h_pad = 3.0+all_labels*2.0, w_pad = 5.0+all_labels*4.0)
    if stdDev:
        ind_max = np.max([[[np.max(ind_avg+ind_std) for ind_avg, ind_std, _, _ in contribs]
                        for contribs in contribsRows] for contribsRows in contribsLists])
    else:
        ind_max = np.max([[[np.max(ind_avg) for ind_avg, _, _, _ in contribs]
                        for contribs in contribsRows] for contribsRows in contribsLists])
    print(ind_max)
    for j in range(h):
        for i in range(w):
            print(titles[j][i]+":")
            ax1 = axes[j][i]
            if scale=="log":
                ax1.set_xscale("log", base=2)
                ax1.xaxis.set_major_formatter(formatter)
                ax1.set_xticks([2**n for n in range(12)])
            if absolute > 0:
                ax1.set_ylim(margeLimits(0, 1))
            else:
                ax1.set_ylim(margeLimits(0, ind_max))
            ax2 = ax1.twinx()
            for ind_avg, ind_std, cum_avg, cum_std in contribsLists[j][i]:
                ind_avg, ind_std = np.array(ind_avg, dtype=float), np.array(ind_std, dtype=float)
                cum_avg, cum_std = np.array(cum_avg, dtype=float), np.array(cum_std, dtype=float)
                print("%.3f  %.3f" % (cum_avg[9], cum_avg[29]))
                x = 1+np.arange(ind_avg.shape[0])
                if stdDev:
                    ax1.plot(x, ind_avg, color=col1)
                    ax2.plot(x, cum_avg, color=col2)
                    ax1.fill_between(x, ind_avg-ind_std, np.minimum(1,ind_avg+ind_std), alpha=.3, linewidth=0, color=col1)
                    ax2.fill_between(x, cum_avg-cum_std, cum_avg+cum_std, alpha=.3, linewidth=0, color=col2)
                else:
                    ax1.plot(x, ind_avg)
                    ax2.plot(x, cum_avg)
                    if legends is not None:
                        ax1.legend(legends, loc='upper center')
            if (j+1 == h or  all_labels):
                ax1.set_xlabel('Sorted tags ', fontsize=16)
            if (i == 0 or all_labels):
                if absolute > 0:
                    ax1.set_ylabel('Absolute tag contribution', color=col1, fontsize=16)
                else:
                    ax1.set_ylabel('Relative tag contribution', color=col1, fontsize=16)
            if (i+1 == w or all_labels):
                if absolute > 1:
                    ax2.set_ylabel('Absolute cummulative sum', color=col2, fontsize=16)
                else:
                    ax2.set_ylabel('Relative cummulative sum', color=col2, fontsize=16)
            plt.title(titles[j][i])
    plt.savefig(pdf, format="pdf", bbox_inches='tight')
    plt.show()
    print("Plots: %.2f seconds." % (time.time()-t0))

In [None]:
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "hybrid_512")
lists = getLists(ids_tag_scores, similarity)
plotStatContribs(lists, similarity, titles, scale="linear", pdf="causality_curve_1.pdf")
plotStatContribs(lists, similarity, titles, scale="log", pdf="causality_curve_log_1.pdf")

In [None]:
params = [[0.0, 1.0, 0.5*i] for i in range(2, 7)]
legends = ["p=%.1f" % (0.5*i) for i in range(2, 7)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params, get_metrics=False)
plotStatContribs(lists, similarity, titles, legends, scale="log", stdDev=False, all_labels=True,
                 pdf="causality_power_log_5.pdf")

In [None]:
params = [[0.0, 1.0*i, 1.0] for i in range(1, 6)]
legends = ["a=%.1f" % (1.0*i) for i in range(1, 6)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params, get_metrics=False)
plotStatContribs(lists, similarity, titles, legends, scale="log", stdDev=False, all_labels=True,
                 pdf="causality_scale_log_5.pdf")

In [None]:
params = [[0.2*i, 3.0, 1.0] for i in range(-2, 3)]
legends = ["b=%.1f" % (0.2*i) for i in range(-2, 3)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params, get_metrics=False)
plotStatContribs(lists, similarity, titles, legends, scale="log", stdDev=False, all_labels=True,
                 pdf="causality_shift_log_5.pdf")

In [None]:
params = [[0.0, 1.0, 0.5*i] for i in range(2, 7)]
legends = ["p=%.1f" % (0.5*i) for i in range(2, 7)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "cosine_512")
lists = getLists(ids_tag_scores, similarity, params, get_metrics=False)
plotStatContribs(lists, similarity, titles, legends, scale="log", stdDev=False, all_labels=True,
                 pdf="causality_power_cosine_log_5.pdf")

In [None]:
params = [[0.2*i, 1.0, 2.0] for i in range(-2, 3)]
legends = ["b=%.1f" % (0.2*i) for i in range(-2, 3)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params, get_metrics=False)
plotStatContribs(lists, similarity, titles, legends, scale="log", stdDev=False, all_labels=True,
                 pdf="causality_shift_cosine_log_5.pdf")

In [None]:
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_512")
lists512 = getLists(ids_tag_scores, similarity, [[0.0, 1.0, 1.0], [0.0, 2.2, 1.0]], get_metrics=False)
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_256")
lists256 = getLists(ids_tag_scores, similarity, [[0.0, 1.0, 1.0], [0.0, 1.8, 1.0]], get_metrics=False)
plotStatContribs(lists512+lists256, similarity, titles, scale="log", stdDev=False)

In [None]:
# [shift, scale, power]
params_list = [[0.0, 1.0, 1.0], # Baseline
               [0.0, 1.0, 2.6], # power 2.6 / 2.5
               [0.0, 3.6, 1.0], # scale 3.6 / 3.5
               [-0.1, 3.6, 1.0]] # shift / -0.1
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params_list, get_metrics=False)
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False, pdf="hybrid_512_curve_log_power.pdf")

In [None]:
params_list = [[0.0, 1.0, 1.0], # Baseline
               [0.0, 1.0, 2.6], # power 2.6 / 2.5
               [0.0, 3.6, 1.0], # scale 3.6 / 3.5
               [-0.1, 3.5, 1.0]] # +shift / -0.1
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params_list, get_metrics=False)
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False, pdf="jaccard_256_curve_log_power.pdf")

In [None]:
params_list = [[0.0, 1.0, 1.0], # Baseline
               [0.0, 1.0, 2.1], # power 3.3 / 2.1
               [0.0, 2.7, 1.0], # scale 3.0 / 2.7
               [-0.4, 2.7, 1.0]] # +shift / -0.4
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_512")
lists = getLists(ids_tag_scores, similarity, params_list, get_metrics=False)
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False, pdf="jaccard_256_curve_log_power.pdf")

In [None]:
params_list = [[0.0, 1.0, 1.0], # Baseline
               [0.0, 1.0, 1.6], # power 1.8 / 1.6
               [0.0, 2.0, 1.0], # scale 2.2 / 2.0
               [-0.3, 2.0, 1.0]] # +shift / -0.4
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_256")
lists = getLists(ids_tag_scores, similarity, params_list, get_metrics=False)
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False, pdf="jaccard_256_curve_log_power.pdf")

In [None]:
params_list = [[0.0, 1.0, 1.0], # Baseline
               [0.0, 1.0, 1.2], # power 1.25 / 1.2
               [-0.3, 1.0, 1.0]] # +shift / -0.3
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "cosine_512")
lists = getLists(ids_tag_scores, similarity, params_list, get_metrics=False)
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False, pdf="cosine_512_curve_log_power.pdf")

In [None]:
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_512")
lists = getLists(ids_tag_scores, similarity, get_pairs=False, print_metrics=True)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])

In [None]:
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_512")
lists = getLists(ids_tag_scores, similarity, sel_step=5, get_pairs=False, print_metrics=True)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])

In [None]:
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, get_pairs=False, print_metrics_latex=True)
metrics = np.array([elems[2] for elems in lists])

In [None]:
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "jaccard_512")
lists = getLists(ids_tag_scores, similarity, get_pairs=False, print_metrics_latex=True)
metrics = np.array([elems[2] for elems in lists])

In [None]:
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "jaccard_256")
lists = getLists(ids_tag_scores, similarity, get_pairs=False, print_metrics_latex=True)
metrics = np.array([elems[2] for elems in lists])

In [None]:
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "cosine_512")
lists = getLists(ids_tag_scores, similarity, get_pairs=False, print_metrics_latex=True)
metrics = np.array([elems[2] for elems in lists])

In [None]:
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "cosine_256")
lists = getLists(ids_tag_scores, similarity, get_pairs=False, print_metrics_latex=True)
metrics = np.array([elems[2] for elems in lists])

In [None]:
# Sanity check: results from original software (v8):
"""
Concept Space Evaluation
 * Text to Video:
 * r_1_5_10, medr, meanr: [26.026, 55.03, 66.177, 4.0, 28.409]
 * recall sum: 147.233
 * mAP: 0.3942
 * ----------
 * Video to text:
 * r_1_5_10, medr, meanr: [44.064, 76.056, 86.72, 2.0, 9.559]
 * recall sum: 206.841
 * mAP: 0.2615
 * ----------
"""
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "cosine_512")
lists = getLists(ids_tag_scores, similarity, get_pairs=False, print_metrics=True, print_metrics_latex=True)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])

### (Manual) search for optimal (shift, scale) parameters for Hybrid_512 on the validation set

In [None]:
params = [[-0.1, 2.7+0.1*i, 1.0] for i in range(11)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[1] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[0.2-0.05*i, 3.2, 1.0] for i in range(11)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[0] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[0.0, 1.0, 1.0], [0.0, 3.20, 1.0], [-0.1, 3.2, 1.0]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params, print_metrics=False)
# lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

In [None]:
params = [[0.0, 1.0, 1.0], [0.0, 3.20, 1.0], [-0.1, 3.2, 1.0]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params, print_metrics=False)
# lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

### (Manual) search for optimal (shift, scale) parameters for jaccard_512 on the validation set

In [None]:
params = [[-0.45, 3.0+0.1*i, 1.0] for i in range(11)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_512")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[1] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[-0.2-0.05*i, 3.6, 1.0] for i in range(11)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_512")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[0] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[0.0, 1.0, 1.0], [0.0, 2.9, 1.0], [-0.45, 3.6, 1.0]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_512")
lists = getLists(ids_tag_scores, similarity, params, print_metrics=False)
# lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

In [None]:
params = [[0.0, 1.0, 1.0], [0.0, 2.9, 1.0], [-0.45, 3.6, 1.0]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "jaccard_512")
lists = getLists(ids_tag_scores, similarity, params, print_metrics=False)
# lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

### (Manual) search for optimal (shift, scale, power) parameters for Jaccard_256 on the validation set

In [None]:
params = [[0.0, 2.0+0.1*i, 0.7] for i in range(11)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_256")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[1] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[-0.2-0.05*i, 3.1, 1.0] for i in range(11)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_256")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[0] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[0.0, 2.7, 0.5+0.05*i] for i in range(11)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_256")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[2] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[0.0, 1.0, 1.0], [0.0, 2.95, 1.0], [-0.50, 3.1, 1.0], [0.0, 2.7, 0.7]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_256")
lists = getLists(ids_tag_scores, similarity, params, print_metrics=False)
# lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

In [None]:
params = [[0.0, 1.0, 1.0], [0.0, 2.95, 1.0], [-0.50, 3.1, 1.0]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "jaccard_256")
lists = getLists(ids_tag_scores, similarity, params, print_metrics=False)
# lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

### (Manual) search for optimal (shift, power) parameters for hybrid_512 on the validation set

In [None]:
params = [[-1.55, 1.0, 7.9+0.1*i] for i in range(11)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[2] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[-1.1-0.1*i, 1.0, 8.60] for i in range(11)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[0] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[0.0, 1.0, 1.0], [0.0, 1.0, 2.70], [-1.55, 1.0, 6.60]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

In [None]:
params = [[0.0, 1.0, 1.0], [0.0, 1.0, 2.70], [-1.55, 1.0, 6.60]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

### (Manual) search for optimal (shift, power) parameters for Jaccard_512 on the validation set

In [None]:
params = [[-1.60, 1.0, 6.2+0.1*i] for i in range(11)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_512")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[2] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[-1.2-0.1*i, 1.0, 6.80] for i in range(11)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_512")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[0] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[0.0, 1.0, 1.0], [0.0, 1.0, 2.20], [-1.60, 1.0, 6.80]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_512")
lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

In [None]:
params = [[0.0, 1.0, 1.0], [0.0, 1.0, 2.20], [-1.60, 1.0, 6.80]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "jaccard_512")
lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

### (Manual) search for optimal (shift, power) parameters for Jaccard_256 on the validation set

In [None]:
params = [[-1.80, 1.0, 6.0+0.1*i] for i in range(11)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_256")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[2] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[-1.2-0.1*i, 1.0, 6.60] for i in range(11)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_256")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[0] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[0.0, 1.0, 1.0], [0.0, 1.0, 1.80], [-1.80, 1.0, 6.60]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "jaccard_256")
lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

In [None]:
params = [[0.0, 1.0, 1.0], [0.0, 1.0, 1.80], [-1.80, 1.0, 6.60]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "jaccard_256")
lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

### (Manual) search for optimal parameters for cosine_512 on the validation set

In [None]:
params = [[-0.25, 1.0, 1.0+0.05*i] for i in range(7)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "cosine_512")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[2] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[-0.1-0.05*i, 1.0, 1.07] for i in range(8)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "cosine_512")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[0] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[0.0, 1.0, 1.0], [-0.25, 1.0, 1.07]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "cosine_512")
lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

In [None]:
params = [[0.0, 1.0, 1.0], [-0.25, 1.0, 1.07]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "cosine_512")
lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

### (Manual) search for optimal parameters for cosine_256 on the validation set

In [None]:
params = [[-0.24, 1.0, 0.80+0.05*i] for i in range(8)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "cosine_256")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[2] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[-0.1-0.05*i, 1.0, 0.98] for i in range(8)]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "cosine_256")
lists = getLists(ids_tag_scores, similarity, params, get_pairs=False, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(np.array(list(zip([param[0] for param in params], 100*metrics[:,0,14]))))

In [None]:
params = [[0.0, 1.0, 1.0], [-0.24, 1.0, 0.98]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "cosine_256")
lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

In [None]:
params = [[0.0, 1.0, 1.0], [-0.24, 1.0, 0.98]]
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "cosine_256")
lists = getLists(ids_tag_scores, similarity, params, sel_step=5, print_metrics=False)
metrics = np.array([elems[2] for elems in lists])
print(100*metrics[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False)

In [None]:
params_list = [[0.0, 1.0, 1.0], # Baseline
               [-0.25, 1.0, 1.0]] # +shift -0.3
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "test", "cosine_256")
lists = getLists(ids_tag_scores, similarity, params_list)
print(100*np.array(lists[2])[:,:,14:16])
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False, pdf="cosine_256_curve_log_power.pdf")

In [None]:
params_list = [[0.0, 1.0, 1.0], # Baseline
               [0.0, 3.5, 1.0]] # scale 3.5
ids_tag_scores, similarity = getIdsTagScores(VisualSearch, "val", "hybrid_512")
lists = getLists(ids_tag_scores, similarity, params_list)
plotStatContribs(lists, similarity, titles, scale="linear", pdf="causality_curve_2.pdf")
plotStatContribs(lists, similarity, titles, scale="log", pdf="causality_curve_log_2.pdf")
plotStatContribs(lists, similarity, titles, scale="log", absolute=1, pdf="causality_curve_log_abs_2.pdf")
plotStatContribs(lists, similarity, titles, scale="log", absolute=2, pdf="causality_curve_log_abs_abs_2.pdf")

In [None]:
params_list = [[0.0, 1.0, 1.0], # Baseline
               [0.0, 1.0, 2.0],
               [0.0, 1.0, 3.0],
               [0.0, 1.0, 4.0],
               [0.0, 1.0, 5.0]]
lists = getLists(ids_tag_scores, similarity, params_list)
plotStatContribs(lists, similarity, titles, scale="linear", stdDev=False, pdf="causality_curve_power.pdf")
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False, pdf="causality_curve_log_power.pdf")

In [None]:
params_list = [[0.0, 2**0.0, 1.0], # Baseline
               [0.0, 2**0.5, 1.0],
               [0.0, 2**1.0, 1.0],
               [0.0, 2**1.5, 1.0],
               [0.0, 2**2.0, 1.0]]
lists = getLists(ids_tag_scores, similarity, params_list)
plotStatContribs(lists, similarity, titles, scale="linear", stdDev=False, pdf="causality_curve_scale.pdf")
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False, pdf="causality_curve_log_scale.pdf")

In [None]:
params_list = [[-0.4, 3.5, 1.0],
               [-0.2, 3.5, 1.0],
               [ 0.0, 3.3, 1.0],
               [ 0.2, 3.5, 1.0],
               [ 0.4, 3.5, 1.0]]
lists = getLists(ids_tag_scores, similarity, params_list)
plotStatContribs(lists, similarity, titles, scale="linear", stdDev=False, pdf="causality_curve_shift.pdf")
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False, pdf="causality_curve_log_shift.pdf")

In [None]:
params_list = [[0.0, 1.0, 1.0], # Baseline
               [0.0, 1.0, 3.0], # Power 3.0
               [0.0, 3.5, 1.0]] # Scale 3.5
lists = getLists(ids_tag_scores, similarity, params_list)
plotStatContribs(lists, similarity, titles, scale="linear", stdDev=False, pdf="causality_curve_power_scale.pdf")
plotStatContribs(lists, similarity, titles, scale="log", stdDev=False, pdf="causality_curve_log_power_scale.pdf")