In [5]:
import numpy as np
import json

In [12]:
def i2t(sims, npts=None, return_ranks=False):
    """
    Images->Text (Image Annotation)
    sims: (N, 5N) matrix of similarity im-cap
    """
    npts = sims.shape[0]
    ranks = np.zeros(npts)
    top1 = np.zeros(npts)
    for index in range(npts):
        inds = np.argsort(sims[index])[::-1]
        # Score
        rank = 1e20
        for i in range(5 * index, 5 * index + 5, 1):
            tmp = np.where(inds == i)[0][0]
            if tmp < rank:
                rank = tmp
        ranks[index] = rank
        top1[index] = inds[0]

    # Compute metrics
    r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks)
    r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks)
    r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks)
    medr = np.floor(np.median(ranks)) + 1
    meanr = ranks.mean() + 1
    if return_ranks:
        return (r1, r5, r10, medr, meanr), (ranks, top1)
    else:
        return (r1, r5, r10, medr, meanr)

def t2i(sims, npts=None, return_ranks=False):
    """
    Text->Images (Image Search)
    sims: (N, 5N) matrix of similarity im-cap
    """
    npts = sims.shape[0]
    ranks = np.zeros(5 * npts)
    top1 = np.zeros(5 * npts)

    # --> (5N(caption), N(image))
    sims = sims.T

    for index in range(npts):
        for i in range(5):
            inds = np.argsort(sims[5 * index + i])[::-1]
            ranks[5 * index + i] = np.where(inds == index)[0][0]
            top1[5 * index + i] = inds[0]

    # Compute metrics
    r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks)
    r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks)
    r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks)
    medr = np.floor(np.median(ranks)) + 1
    meanr = ranks.mean() + 1
    if return_ranks:
        return (r1, r5, r10, medr, meanr), (ranks, top1)
    else:
        return (r1, r5, r10, medr, meanr)

In [8]:
def get_gt_ranks(ranks, ans_ind):
        gt_ranks = ans_ind.copy()
        for i in range(ans_ind.shape[0]):
            gt_ranks[i] = int(ranks[i, int(ans_ind[i])])
        return gt_ranks
    
def score_to_ranks(scores):
    # sort in descending order - largest score gets highest rank
    #sorted_ranks, ranked_idx = scores.sort(1, descending=True)
    ranked_idx = np.argsort(scores, axis=1)
    ranked_idx=np.flip(ranked_idx,axis=1)
    ranks = np.zeros_like(ranked_idx)
    for i in range(ranked_idx.shape[0]):
        for j in range(scores.shape[1]):
            ranks[i][ranked_idx[i][j]] = j
    ranks += 1
    return ranks

def RecallK(output,expected, k=1):
    #assume prediction is a n*n matrix
    ranks = score_to_ranks(output)
    #print(ranks)
    gt_ranks = get_gt_ranks(ranks, expected).astype(float)
    #print(gt_ranks)
    recall_1 = float(np.sum(np.less_equal(gt_ranks,1))) /gt_ranks.shape[0]
    print(recall_1)
    recall_5 = float(np.sum(np.less_equal(gt_ranks,5))) /gt_ranks.shape[0]
    print(recall_5)
    recall_10 = float(np.sum(np.less_equal(gt_ranks,10))) /gt_ranks.shape[0]
    print(recall_10)
    
#     ranks = process_ranks(gt_ranks)
    
output = np.array([[1,2,3],
                  [2,1,3],
                  [4,6,2]
                  ])
expected = np.array([2,1,0],dtype=np.int_)
RecallK(output, expected)

0.3333333333333333
1.0
1.0


In [None]:
#LOAD THE PREDICTION
import json
#region_tag+sentence_image
#result_path = "/fsx/zmykevin/experiments/mmf_exp/itm_flickr30k_visual_bert_35549945/reports/itm_flickr30k_run_test_2021-10-23T18:51:07.json"

#no pretrain
#result_path = "/fsx/zmykevin/experiments/mmf_exp/itm_flickr30k_visual_bert_12992184/reports/itm_flickr30k_run_test_2021-10-23T19:05:42.json"

#paired pretrain
#result_path = "/fsx/zmykevin/experiments/mmf_exp/itm_flickr30k_visual_bert_12590222/reports/itm_flickr30k_run_test_2021-10-26T03:06:33.json"

#using logits
result_path = "/fsx/zmykevin/experiments/sweep_jobs/visualbert_paired_vinvl_itm_zs_test_0..ngpu1/itm_flickr30k_visual_bert_2619040/reports/itm_flickr30k_run_test_2021-10-26T14:53:56.json"
with open(result_path, "r") as f:
    prediction_raw = json.load(f)

In [None]:
print(len(prediction_raw))

In [7]:
prediction = np.zeros((200,5000))

break_point = 5000
col_idx = 0
for i, x in enumerate(prediction_raw):
    #print(x)
    row_idx = int(i/break_point)
    #print(row_idx)
    if i%break_point == 0:
        col_idx = 0
    prediction[row_idx,col_idx] = x['answer']
    col_idx += 1

In [None]:
for x in prediction_raw[100:110]:
    print(x)

In [8]:
t2i_expected = []
for x in range(20):
    t2i_expected += [x]*5
t2i_expected = np.array(t2i_expected, dtype=np.int_)
# expected = np.array(list(range(100)),dtype=np.int_)
print(t2i_expected)

[ 0  0  0  0  0  1  1  1  1  1  2  2  2  2  2  3  3  3  3  3  4  4  4  4
  4  5  5  5  5  5  6  6  6  6  6  7  7  7  7  7  8  8  8  8  8  9  9  9
  9  9 10 10 10 10 10 11 11 11 11 11 12 12 12 12 12 13 13 13 13 13 14 14
 14 14 14 15 15 15 15 15 16 16 16 16 16 17 17 17 17 17 18 18 18 18 18 19
 19 19 19 19]


In [38]:
# import random
# prediction = np.ones((100,100))
# prediction = np.diag(np.diag(prediction))
# prob = 0.5
# for i in range(prediction.shape[0]):
#     if random.random() < prob:
#         prediction[i] = np.random.rand(1,100)


In [9]:
RecallK(prediction.T,t2i_expected)

0.35
0.66
0.77


In [None]:
#i to t: 0.08, 0.33, 0.46
#t to i: 0.02, 0.34, 0.46

#(92.0, 97.0, 98.0, 1.0, 1.5699999999999998)
#(82.4, 95.6, 98.8, 1.0, 1.65)

In [8]:
#i to t: 0.01, 0.04, 0.09
#t to i: 0.0, 0.05, 0.1
print(i2t(prediction))
print(t2i(prediction))

(67.5, 90.0, 95.5, 1.0, 4.34)
(76.8, 94.6, 97.7, 1.0, 1.967)


In [None]:
#Load and combine the test data split
result_path_0 = "/fsx/zmykevin/experiments/mmf_exp/itm_flickr30k_visual_bert_19648373/reports/itm_flickr30k_run_test_2021-10-26T07:35:33.json"
result_path_1 = "/fsx/zmykevin/experiments/sweep_jobs/visualbert_paired_vinvl_itm_zs_test_1..ngpu1/itm_flickr30k_visual_bert_2902261/reports/itm_flickr30k_run_test_2021-10-26T15:27:37.json"
result_path_2 = "/fsx/zmykevin/experiments/sweep_jobs/visualbert_paired_vinvl_itm_zs_test_2..ngpu1/itm_flickr30k_visual_bert_2619040/reports/itm_flickr30k_run_test_2021-10-26T14:53:56.json"
result_path_3 = "/fsx/zmykevin/experiments/mmf_exp/itm_flickr30k_visual_bert_19660505/reports/itm_flickr30k_run_test_2021-10-26T07:36:05.json"
result_path_4 = "/fsx/zmykevin/experiments/sweep_jobs/visualbert_paired_vinvl_itm_zs_test_4..ngpu1/itm_flickr30k_visual_bert_1319889/reports/itm_flickr30k_run_test_2021-10-26T15:06:28.json"

with open(result_path_0, "r") as f:
    prediction_raw_0 = json.load(f)

with open(result_path_1, "r") as f:
    prediction_raw_1 = json.load(f)
    
with open(result_path_2, "r") as f:
    prediction_raw_2 = json.load(f)
    
with open(result_path_3, "r") as f:
    prediction_raw_3 = json.load(f)
    
with open(result_path_4, "r") as f:
    prediction_raw_4 = json.load(f)
    
print(len(prediction_raw_0))
print(len(prediction_raw_1))
print(len(prediction_raw_2))
print(len(prediction_raw_3))
print(len(prediction_raw_4))

In [10]:
#combine the five predictions
prediction_raw = prediction_raw_0 + prediction_raw_1 + prediction_raw_2 + prediction_raw_3 + prediction_raw_4
#prediction_raw = prediction_raw_0

# prediction = np.zeros((200,5000))
# col_idx = 0
# #top_5000 = []
# for i, x in enumerate(prediction_raw_0):
#     row_idx = int(i/5000)
#     if i% 5000 == 0:
#         col_idx = 0
#     prediction[row_idx, col_idx] = x["answer"]
#     col_idx += 1
#     if i > 44999:
#         print(x)
#     if i == 149999:
#        break

# top_5000 = np.array(top_5000)
# top_5000_argmax = np.argmax(top_5000)
# print(top_5000_argmax)
#print(t2i(prediction))

In [26]:
prediction = np.zeros((1000,5000))

break_point = 5000
col_idx = 0
for i, x in enumerate(prediction_raw):
    #print(x)
    row_idx = int(i/break_point)
    #print(row_idx)
    if i%break_point == 0:
        col_idx = 0
    prediction[row_idx,col_idx] = x['answer']
    col_idx += 1

In [12]:
print(i2t(prediction))
print(t2i(prediction))

(67.5, 90.0, 95.5, 1.0, 4.34)
(76.8, 94.6, 97.7, 1.0, 1.967)


In [16]:
t2i_expected = []
for x in range(1000):
    t2i_expected += [x]*5
t2i_expected = np.array(t2i_expected, dtype=np.int_)
# expected = np.array(list(range(100)),dtype=np.int_)


In [17]:
# RecallK(prediction.T,t2i_expected)

IndexError: index 200 is out of bounds for axis 1 with size 200

In [2]:
print(int(1/10))
print(int(9/10))

0
0
