In [None]:
import os
import numpy as np
import torch
from collections import OrderedDict
import json
from models.mlp import MLP_MoCo
from tqdm import tqdm

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')

In [None]:
query_feature_A_dir = '/nfs3-p2/zsxm/naic/preliminary/test_A/query_feature_A'
gallery_feature_A_dir = '/nfs3-p2/zsxm/naic/preliminary/test_A/gallery_feature_A'
train_feature_dir = '/nfs3-p1/zsxm/naic/preliminary/train/train_feature'

In [None]:
query_feature_A = []
for data in sorted(os.listdir(query_feature_A_dir)):
    query_feature_A.append(torch.from_numpy(np.fromfile(os.path.join(query_feature_A_dir, data), dtype='<f4')))
query_feature_A = torch.stack(query_feature_A)

In [None]:
gallery_feature_A = []
for data in sorted(os.listdir(gallery_feature_A_dir)):
    gallery_feature_A.append(torch.from_numpy(np.fromfile(os.path.join(gallery_feature_A_dir, data), dtype='<f4')))
gallery_feature_A = torch.stack(gallery_feature_A)

In [None]:
print(query_feature_A.shape, query_feature_A)
print(gallery_feature_A.shape, gallery_feature_A)
torch.save(query_feature_A, '/nfs3-p2/zsxm/naic/preliminary/test_A/query_feature_A.pt')
torch.save(gallery_feature_A, '/nfs3-p2/zsxm/naic/preliminary/test_A/gallery_feature_A.pt')

In [None]:
train_feature = []
for data in sorted(os.listdir(train_feature_dir)):
    train_feature.append(torch.from_numpy(np.fromfile(os.path.join(train_feature_dir, data), dtype='<f4')))
train_feature = torch.stack(train_feature)
print(train_feature.shape)
torch.save(train_feature, '/nfs3-p1/zsxm/naic/preliminary/train/train_feature.pt')

In [None]:
query_feature_A = torch.load('/nfs3-p2/zsxm/naic/preliminary/test_A/query_feature_A.pt')
gallery_feature_A = torch.load('/nfs3-p2/zsxm/naic/preliminary/test_A/gallery_feature_A.pt')
train_feature = torch.load('/nfs3-p1/zsxm/naic/preliminary/train/train_feature.pt')
print(query_feature_A.shape, query_feature_A)
print(gallery_feature_A.shape, gallery_feature_A)
print(train_feature.shape, train_feature)

In [None]:
query_feature_A = query_feature_A.to(device)
gallery_feature_A = gallery_feature_A.to(device)
train_feature = train_feature.to(device)

In [None]:
def cos_similarity(q, k):
    try:
        mm = torch.mm(q, k.T)
        qn = torch.linalg.vector_norm(q, dim=1, keepdim=True)
        kn = torch.linalg.vector_norm(k, dim=1, keepdim=True)
        qk = torch.mm(qn, kn.T)
        res = mm / qk
    except RuntimeError:
        q = q.cpu()
        k = k.cpu()
        mm = torch.mm(q, k.T)
        qn = torch.linalg.vector_norm(q, dim=1, keepdim=True)
        kn = torch.linalg.vector_norm(k, dim=1, keepdim=True)
        qk = torch.mm(qn, kn.T)
        res = mm / qk
    return res

In [None]:
def cos_similarity(q, k):
    try:
        q = torch.nn.functional.normalize(q, dim=1)
        k = torch.nn.functional.normalize(k, dim=1)
        res = torch.mm(q, k.T)
    except RuntimeError:
        q = q.cpu()
        k = k.cpu()
        q = torch.nn.functional.normalize(q, dim=1)
        k = torch.nn.functional.normalize(k, dim=1)
        res = torch.mm(q, k.T)
    return res

In [None]:
@torch.no_grad()
def batch_cos(q, k, batch_size=2048):
    res = []
    for i in tqdm(range(0, q.shape[0], batch_size)):
        bres = cos_similarity(q[i:i+batch_size], k)
        res.append(bres)
    return torch.cat(res)

In [None]:
res = batch_cos(query_feature_A, gallery_feature_A)
print(res.shape)

In [None]:
net = MLP_MoCo()
net.load_state_dict(torch.load('.details/checkpoints/MLP/01-11_11:48:10/Net_best.pth', map_location=device))
net.to(device)
net = net.encoder_q#.encoder
net.eval()
print('')

In [None]:
@torch.no_grad()
def encode(net, data, batch_size=2048):
    res, rcons = [], []
    for i in tqdm(range(0, data.shape[0], batch_size)):
        brcons, bres = net(data[i:i+batch_size])
        res.append(bres)
        rcons.append(brcons)
    return torch.cat(res), torch.cat(rcons)

In [None]:
query_code_A, query_recons_A = encode(net, query_feature_A)
gallery_code_A, gallery_recons_A = encode(net, gallery_feature_A)

In [None]:
print(query_code_A.shape, query_recons_A.shape)
print(gallery_code_A.shape, gallery_recons_A.shape)

In [None]:
res2 = batch_cos(query_code_A, gallery_code_A)
print(res.shape)

In [None]:
def print_res(res, print_json=False):
    res_dict = {}
    query_names = sorted(os.listdir(query_feature_A_dir))
    gallery_names = sorted(os.listdir(gallery_feature_A_dir))
    for i, name in enumerate(tqdm(query_names)):
        idx = torch.argsort(res[i], dim=-1, descending=True)
        query_res = []
        for j in range(100):
            query_res.append(gallery_names[idx[j]])
        res_dict[name] = query_res
    if print_json:
        with open('./sub_a.json', 'w') as f:
            json.dump(res_dict, f)

In [None]:
def compare_res(res1, res2, topk=100):
    assert res1.shape[0] == res2.shape[0]
    ave = 0
    t = tqdm(range(res1.shape[0]))
    for i in t:
        idx1 = set(torch.argsort(res1[i], dim=-1, descending=True)[:topk].tolist())
        idx2 = set(torch.argsort(res2[i], dim=-1, descending=True)[:topk].tolist())
        iou = len(idx1&idx2)/len(idx1|idx2)
        t.set_postfix(IoU=f'{iou:.4f}')
        ave += iou
    ave /= res1.shape[0]
    return ave

In [None]:
ave = compare_res(res, res2)
print(ave)

# 我的ReRanking

In [None]:
import os
import numpy as np
import torch
import json
from tqdm import tqdm

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
query_reshape_A = torch.load('/nfs3-p2/zsxm/naic/preliminary/test_A/query_reshape_A.pt').to(device)
gallery_reshape_A = torch.load('/nfs3-p2/zsxm/naic/preliminary/test_A/gallery_reshape_A.pt').to(device)

In [None]:
def cos_similarity(q, k):
    try:
        q = torch.nn.functional.normalize(q, dim=1)
        k = torch.nn.functional.normalize(k, dim=1)
        res = torch.mm(q, k.T)
    except RuntimeError:
        q = q.cpu()
        k = k.cpu()
        q = torch.nn.functional.normalize(q, dim=1)
        k = torch.nn.functional.normalize(k, dim=1)
        res = torch.mm(q, k.T)
    return res

@torch.no_grad()
def batch_cos(q, k, batch_size=2048):
    res = []
    for i in tqdm(range(0, q.shape[0], batch_size)):
        bres = cos_similarity(q[i:i+batch_size], k)
        res.append(bres)
    return torch.cat(res)

def batch_pearson(q, k, batch_size=1024):
    k = k-k.mean(dim=-1, keepdim=True)
    q = q-q.mean(dim=-1, keepdim=True)
    res = []
    for i in tqdm(range(0, q.shape[0], batch_size)):
        bres = cos_similarity(q[i:i+batch_size], k)
        res.append(bres.cpu())
    return torch.cat(res)

In [None]:
#res = batch_cos(query_reshape_A, gallery_reshape_A)
res = batch_pearson(query_reshape_A, gallery_reshape_A)

In [None]:
def batch_sort(dist, batch_size=2048):
    label = []
    for i in tqdm(range(0, dist.shape[0], batch_size)):
        blabel = torch.argsort(dist[i:i+batch_size], descending=True)
        label.append(blabel[:, :10])
        del blabel
    return torch.cat(label)

In [None]:
label = batch_sort(res)

In [None]:
print(label.shape)
print(res.shape)

In [None]:
weight_sum = 0
for i in range(6):
    weight_sum += 0.8**i
avg_query_reshape_A = []
for i in tqdm(range(query_reshape_A.shape[0])):
    avg = query_reshape_A[i].clone()
    for j in range(0, 5):
        avg += 0.8**(j+1)*gallery_reshape_A[label[i, j]]
    avg /= weight_sum
    #avg = 0.7 * gallery_reshape_A[label[i, 0]] + 0.3 * gallery_reshape_A[label[i, 1]]
    avg_query_reshape_A.append(avg)
avg_query_reshape_A = torch.stack(avg_query_reshape_A)
print(avg_query_reshape_A.shape)

In [None]:
# res_rerank = batch_cos(avg_query_reshape_A, gallery_reshape_A)
res_rerank = batch_pearson(avg_query_reshape_A, gallery_reshape_A)

In [None]:
def print_res(res, print_json=False):
    query_feature_A_dir = '/nfs3-p2/zsxm/naic/preliminary/test_A/query_feature_A'
    gallery_feature_A_dir = '/nfs3-p2/zsxm/naic/preliminary/test_A/gallery_feature_A'
    res_dict = {}
    query_names = sorted(os.listdir(query_feature_A_dir))
    gallery_names = sorted(os.listdir(gallery_feature_A_dir))
    for i, name in enumerate(tqdm(query_names)):
        idx = torch.argsort(res[i], dim=-1, descending=True)
        query_res = []
        for j in range(100):
            query_res.append(gallery_names[idx[j]])
        res_dict[name] = query_res
    if print_json:
        with open('./sub_a.json', 'w') as f:
            json.dump(res_dict, f)
        
#print_res(res_rerank, True)
print_res(res, True)

# 马氏距离计算

In [None]:
qfs = query_feature_A.sum(dim=0)
nqfs = torch.arange(2048)[qfs!=0]
print(nqfs.shape, nqfs)
gfs = gallery_feature_A.sum(dim=0)
ngfs = torch.arange(2048)[gfs!=0]
print(ngfs.shape, ngfs)
print(nqfs.equal(ngfs))
tfs = train_feature.sum(dim=0)
ntfs = torch.arange(2048)[tfs!=0]
print(ntfs.shape, ntfs)
print(nqfs.equal(ntfs))

In [None]:
not_zero_dim = nqfs
torch.save(not_zero_dim, '/nfs3-p1/zsxm/naic/preliminary/train/not_zero_dim.pt')

In [None]:
query_reshape_A = query_feature_A[:, qfs!=0]
gallery_reshape_A = gallery_feature_A[:, gfs!=0]
print(query_reshape_A.shape, gallery_reshape_A.shape)

In [None]:
print(query_reshape_A.abs().mean(), gallery_reshape_A.abs().mean())
print(query_reshape_A.abs().max(), gallery_reshape_A.abs().max())

In [None]:
del query_feature_A, gallery_feature_A

In [None]:
def Mahalanobis(q, k):
    query_names = sorted(os.listdir(query_feature_A_dir))
    gallery_names = sorted(os.listdir(gallery_feature_A_dir))
    q = torch.nn.functional.normalize(q, dim=1)
    k = torch.nn.functional.normalize(k, dim=1)
    
    mean_k = k.mean(dim=0, keepdim=True)
    sk = k - mean_k
    cov = torch.mm(sk.T, sk)/(sk.shape[0]-1)
    print(cov.shape, cov)
    cov = cov.to(torch.device('cpu'))
    icov = torch.linalg.inv(cov)
    print(icov.shape, icov)
    del cov
    icov = icov.to(device)
    res_dict = {}
    for i in tqdm(range(q.shape[0])):
        sub = q[i] - k
        mul = torch.mm(sub, icov)
        r = torch.einsum('bi,bi->b', mul, sub)
        assert r.shape == (k.shape[0],)
        idx = torch.argsort(r, descending=False)
        query_res = []
        for j in range(100):
            query_res.append(gallery_names[idx[j]])
        res_dict[query_names[i]] = query_res
    return res_dict

In [None]:
res3 = Mahalanobis(query_reshape_A, gallery_reshape_A)

In [None]:
with open('./sub_a.json', 'w') as f:
    json.dump(res3, f)

# ReRanking

In [None]:
import os
import numpy as np
from collections import OrderedDict
import json
from tqdm import tqdm
from scipy.spatial.distance import cdist

query_feature_A_dir = '/nfs3-p2/zsxm/naic/preliminary/test_A/query_feature_A'
gallery_feature_A_dir = '/nfs3-p2/zsxm/naic/preliminary/test_A/gallery_feature_A'

In [None]:
query_feature_A = []
for data in sorted(os.listdir(query_feature_A_dir)):
    query_feature_A.append(np.fromfile(os.path.join(query_feature_A_dir, data), dtype='<f4'))
query_feature_A = np.stack(query_feature_A)
gallery_feature_A = []
for data in sorted(os.listdir(gallery_feature_A_dir)):
    gallery_feature_A.append(np.fromfile(os.path.join(gallery_feature_A_dir, data), dtype='<f4'))
gallery_feature_A = np.stack(gallery_feature_A)
print(query_feature_A.shape, gallery_feature_A.shape)
qfs = query_feature_A.sum(axis=0)
nqfs = np.arange(2048)[qfs!=0]
print(nqfs.shape, nqfs)
gfs = gallery_feature_A.sum(axis=0)
ngfs = np.arange(2048)[gfs!=0]
print(ngfs.shape, ngfs)
print((nqfs==ngfs).all)
query_reshape_A = query_feature_A[:, qfs!=0]
gallery_reshape_A = gallery_feature_A[:, gfs!=0]
print(query_reshape_A.shape, gallery_reshape_A.shape)
np.save('/nfs3-p2/zsxm/naic/preliminary/test_A/query_feature_A.npy', query_feature_A)
np.save('/nfs3-p2/zsxm/naic/preliminary/test_A/gallery_feature_A.npy', gallery_feature_A)
np.save('/nfs3-p2/zsxm/naic/preliminary/test_A/query_reshape_A.npy', query_reshape_A)
np.save('/nfs3-p2/zsxm/naic/preliminary/test_A/gallery_reshape_A.npy', gallery_reshape_A)

In [None]:
query_reshape_A = np.load('/nfs3-p2/zsxm/naic/preliminary/test_A/query_reshape_A.npy')
gallery_reshape_A = np.load('/nfs3-p2/zsxm/naic/preliminary/test_A/gallery_reshape_A.npy')

In [None]:
print(query_reshape_A.shape, gallery_reshape_A.shape)

In [None]:
from re_ranking.re_ranking_pytable import re_ranking

In [None]:
res = re_ranking(20000, 428794, 100, 30, 0.3, 1000)

In [None]:
print(res)

In [None]:
print(res)

In [None]:
import os
import numpy as np
import tables
from tqdm import tqdm
dis_path = '/nfs3-p2/zsxm/naic/preliminary/test_A/dis'

In [None]:
hdf5_path = os.path.join(dis_path, 'reranking.hdf5')
hdf5_file = tables.open_file(hdf5_path, mode='w')

In [None]:
filters = tables.Filters()

In [None]:
temp = np.load(os.path.join(dis_path, 'original_dist-0.npy'))

In [None]:
all_num = temp.shape[1]
print(all_num)

In [None]:
original_dist = hdf5_file.create_earray(hdf5_file.root, 
                                        'original_dist', 
                                        tables.Atom.from_dtype(temp.dtype), 
                                        shape=(0, temp.shape[1]), 
                                        filters=filters, 
                                        expectedrows=temp.shape[1])

In [None]:
for t, i in enumerate(tqdm(range(0, all_num, 1000))):
    ori = np.load(os.path.join(dis_path, f'original_dist-{t}.npy'))
    original_dist.append(ori)

In [None]:
temp = np.load(os.path.join(dis_path, 'initial_rank-0.npy'))

In [None]:
initial_rank = hdf5_file.create_earray(hdf5_file.root, 
                                        'initial_rank', 
                                        tables.Atom.from_dtype(temp.dtype), 
                                        shape=(0, 101), 
                                        filters=filters, 
                                        expectedrows=all_num)

In [None]:
for t, i in enumerate(tqdm(range(0, all_num, 1000))):
    ori = np.load(os.path.join(dis_path, f'initial_rank-{t}.npy'))
    initial_rank.append(ori[:, :101])

In [None]:
hdf5_file.close()

In [None]:
V = hdf5_file.create_carray(hdf5_file.root, 
                            'V', 
                            tables.Atom.from_dtype(np.zeros(1, dtype=np.float32).dtype), 
                            shape=(all_num, all_num), 
                            filters=filters)

In [None]:
hdf5_file.root

In [None]:
type(V)

In [None]:
hdf5_file.remove_node(hdf5_file.root, 'V')

In [None]:
hdf5_file = tables.open_file(hdf5_path, mode='r')

In [None]:
hdf5_file.root

In [None]:
original_dist = hdf5_file.root.original_dist

In [None]:
original_dist[400000]

In [None]:
hdf5_file.close()

In [None]:
from datasets.preliminary_dataset import PreliminaryDataset, PreliminaryBatchSampler, preliminary_collate_fn
import random
from torch.utils.data import DataLoader

In [None]:
dataset = PreliminaryDataset('/nfs3-p1/zsxm/naic/preliminary/train', False)

In [None]:
batchsampler = PreliminaryBatchSampler(dataset, 150)

In [None]:
dataloader = DataLoader(dataset, batch_sampler=batchsampler, num_workers=8, collate_fn=preliminary_collate_fn, pin_memory=True)

In [None]:
random.seed(2)#2078
test_count = 0
while True:
    batchs = []
    t_batch = []
    for i, b in enumerate(batchsampler):
        #print(i, b)
        batchs.append(b)
        t_batch.extend(b)
        b_len = 0
        for idx in b:
            b_len += dataset.idx2len[idx]
        assert 0 < b_len <= batchsampler.batch_size, str(b_len)+str(b)+str(i)
    assert len(t_batch) == 15000, len(t_batch)
    t_batch_set = set(t_batch)
    assert len(t_batch) == len(t_batch_set), len(t_batch_set)
    test_count +=1
    print(test_count)

In [None]:
count = 0
for q, k, q_label, k_label in dataloader:
    print(count)
    print(q.shape)
    print(k.shape)
    print(q_label.shape)
    print(k_label.shape)
    count += 1

# KISSME

In [None]:
import os
import numpy as np
import torch
from tqdm import tqdm

In [None]:
train_reshape = torch.load('/nfs3-p1/zsxm/naic/preliminary/train/train_reshape.pt')
train_label = torch.load('/nfs3-p1/zsxm/naic/preliminary/train/train_label.pt')
print(train_reshape.shape, train_label.shape)

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
train_reshape = train_reshape.to(device)
train_label = train_label.to(device)

In [None]:
sigma0 = torch.zeros(train_reshape.shape[1], train_reshape.shape[1], dtype=torch.float, device=device)
sigma1 = torch.zeros(train_reshape.shape[1], train_reshape.shape[1], dtype=torch.float, device=device)
for i in tqdm(range(train_reshape.shape[0])):
    for j in range(i+1, train_reshape.shape[0]):
        xij = (train_reshape[i] - train_reshape[j]).unsqueeze(-1)
        mij = torch.mm(xij, xij.T)
        if train_label[i] == train_label[j]:
            sigma1 += 2 * mij
        else:
            sigma0 += 2 * mij

In [None]:
sigma0 = torch.zeros(train_reshape.shape[1], train_reshape.shape[1], dtype=torch.float, device=device)
sigma1 = torch.zeros(train_reshape.shape[1], train_reshape.shape[1], dtype=torch.float, device=device)
BATCH_SIZE = 128
for i in tqdm(range(train_reshape.shape[0]-1)):
    xij = train_reshape[i] - train_reshape[i+1:]
    flag_0 = train_label[i+1:].ne(train_label[i])
    flag_1 = train_label[i+1:].eq(train_label[i])
    for k in range(0, xij.shape[0], BATCH_SIZE):
        mij = torch.bmm(xij[k:k+BATCH_SIZE].unsqueeze(-1), xij[k:k+BATCH_SIZE].unsqueeze(1))
        sigma0 += (flag_0[k:k+BATCH_SIZE, None, None]*mij).sum(dim=0)
        sigma1 += (flag_1[k:k+BATCH_SIZE, None, None]*mij).sum(dim=0)
        del mij

In [None]:
from math import sqrt
n = 259449
pre = 0
for y in range(1, 9):
    tnp1 = 2*n+1
    sq = sqrt(tnp1**2-y*(n**2+n)/2)
    this = int((tnp1-sq)/2)
    print(this, this - pre)
    pre = this

In [None]:
import os
import numpy as np
import torch
import json
from tqdm import tqdm

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
train_reshape = torch.load('/nfs3-p1/zsxm/naic/preliminary/train/train_reshape.pt')
sigma0 = torch.zeros(train_reshape.shape[1], train_reshape.shape[1], dtype=torch.float, device=device)
sigma1 = torch.zeros(train_reshape.shape[1], train_reshape.shape[1], dtype=torch.float, device=device)

In [None]:
input_dir = '/nfs3-p2/zsxm/naic/preliminary/train/'
for file in sorted(os.listdir(input_dir)):
    if file.startswith('sigma0_'):
        sigma0 += torch.load(os.path.join(input_dir, file))
    if file.startswith('sigma1_'):
        sigma1 += torch.load(os.path.join(input_dir, file))

In [None]:
Mp = torch.linalg.inv(sigma1.cpu()) - torch.linalg.inv(sigma0.cpu())

In [None]:
del sigma1, sigma0

In [None]:
eigenvalues, eigenvectors = torch.linalg.eigh(Mp, 'L')
print(eigenvalues)

In [None]:
eigenvalues = torch.nn.functional.relu(eigenvalues, inplace=True)
print(eigenvalues)

In [None]:
ev = torch.zeros_like(eigenvectors)
ev[range(len(ev)), range(len(ev))] = eigenvalues
M = eigenvectors @ ev @ eigenvectors.T

In [None]:
print(ev)

In [None]:
print(Mp)
print(M)

In [None]:
query_feature_A_dir = '/nfs3-p2/zsxm/naic/preliminary/test_A/query_feature_A'
gallery_feature_A_dir = '/nfs3-p2/zsxm/naic/preliminary/test_A/gallery_feature_A'
def KISSME(q, k, M):
    query_names = sorted(os.listdir(query_feature_A_dir))
    gallery_names = sorted(os.listdir(gallery_feature_A_dir))
    
    res_dict = {}
    for i in tqdm(range(q.shape[0])):
        sub = q[i] - k
        mul = torch.mm(sub, M)
        r = torch.einsum('bi,bi->b', mul, sub)
        assert r.shape == (k.shape[0],)
        idx = torch.argsort(r, descending=False)
        query_res = []
        for j in range(100):
            query_res.append(gallery_names[idx[j]])
        res_dict[query_names[i]] = query_res
    return res_dict

In [None]:
query_reshape_A = torch.load('/nfs3-p2/zsxm/naic/preliminary/test_A/query_reshape_A.pt').to(device)
gallery_reshape_A = torch.load('/nfs3-p2/zsxm/naic/preliminary/test_A/gallery_reshape_A.pt').to(device)

In [None]:
device1 = device#torch.device('cpu')
query_reshape_A = query_reshape_A.to(device1)
gallery_reshape_A = gallery_reshape_A.to(device1)
Mp = Mp.to(device1)

In [None]:
res = KISSME(query_reshape_A, gallery_reshape_A, M.to(device))

In [None]:
with open('./sub_a.json', 'w') as f:
    json.dump(res, f)

# k-reciprocal

In [None]:
import os
import numpy as np
import torch
import json
from tqdm import tqdm
import tables

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
work_dir = '/nfs3-p2/zsxm/naic/preliminary/test_A/k_reciprocal/'

In [None]:
query_reshape_A = torch.load('/nfs3-p2/zsxm/naic/preliminary/test_A/query_reshape_A.pt').to(device)
gallery_reshape_A = torch.load('/nfs3-p2/zsxm/naic/preliminary/test_A/gallery_reshape_A.pt').to(device)

In [None]:
all_reshape_A = torch.cat([query_reshape_A, gallery_reshape_A])
print(all_reshape_A.shape)

In [None]:
def cos_similarity(q, k):
    try:
        q = torch.nn.functional.normalize(q, dim=1)
        k = torch.nn.functional.normalize(k, dim=1)
        res = torch.mm(q, k.T)
    except RuntimeError:
        print('out of GPU memory!')
        q = q.cpu()
        k = k.cpu()
        q = torch.nn.functional.normalize(q, dim=1)
        k = torch.nn.functional.normalize(k, dim=1)
        res = torch.mm(q, k.T)
    return res

def batch_pearson(k, h5earray, batch_size=1024):
    k = k-k.mean(dim=-1, keepdim=True)
    for i in tqdm(range(0, k.shape[0], batch_size)):
        bres = cos_similarity(k[i:i+batch_size], k)
        bres = (bres+1)/2
        h5earray.append(bres.cpu().numpy())

In [None]:
dist_hdf5_file = tables.open_file(os.path.join(work_dir, 'original_dist_pearson.hdf5'), mode='w')
original_dist = dist_hdf5_file.create_earray(dist_hdf5_file.root, 
                                             'original_dist', 
                                             tables.Float32Atom(), 
                                             shape=(0, all_reshape_A.shape[0]), 
                                             filters=tables.Filters(), 
                                             expectedrows=all_reshape_A.shape[0])
batch_pearson(all_reshape_A, original_dist)
dist_hdf5_file.close()

In [None]:
dist_hdf5_file.close()

In [None]:
dist_hdf5_file = tables.open_file(os.path.join(work_dir, 'original_dist_pearson.hdf5'), mode='r')
original_dist = dist_hdf5_file.root.original_dist
print(original_dist.shape)
dist_hdf5_file.close()

In [None]:
def batch_rank(k, dist_earray, rank_earry, batch_size=256, descending=True):
    for i in tqdm(range(0, k.shape[0], batch_size)):
        dist = torch.from_numpy(dist_earray[i:i+batch_size]).to(device)
        brklist = torch.argsort(dist, dim=-1, descending=descending).int().cpu().numpy()
        rank_earry.append(brklist[:, :101])

In [None]:
rank_hdf5_file = tables.open_file(os.path.join(work_dir, 'initial_rank_pearson.hdf5'), mode='w')
initial_rank = rank_hdf5_file.create_earray(rank_hdf5_file.root, 
                                            'initial_rank', 
                                            tables.Int32Atom(), 
                                            shape=(0, 101), 
                                            filters=tables.Filters(), 
                                            expectedrows=all_reshape_A.shape[0])
dist_hdf5_file = tables.open_file(os.path.join(work_dir, 'original_dist_pearson.hdf5'), mode='r')
original_dist = dist_hdf5_file.root.original_dist
batch_rank(all_reshape_A, original_dist, initial_rank)

In [None]:
rank_hdf5_file.close()
dist_hdf5_file.close()

In [None]:
import os
import numpy as np
import torch
import tables
from tqdm import tqdm

ALL_NUM = 448794
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
work_dir = '/nfs3-p2/zsxm/naic/preliminary/test_A/k_reciprocal/'

def calc_V(num, start, end, k1=100):
    dist_hdf5_file = tables.open_file(os.path.join(work_dir, 'original_dist_pearson.hdf5'), mode='r')
    original_dist = dist_hdf5_file.root.original_dist
    rank_hdf5_file = tables.open_file(os.path.join(work_dir, 'initial_rank_pearson.hdf5'), mode='r')
    initial_rank = rank_hdf5_file.root.initial_rank
    v_hdf5_file = tables.open_file(os.path.join(work_dir, f'V_{num}.hdf5'), mode='w')
    V = v_hdf5_file.create_carray(v_hdf5_file.root, 'V', tables.Float32Atom(), shape=(end-start, ALL_NUM), filters=tables.Filters())

    for i in tqdm(range(start, end), desc='calculate V'):
        forward_k_neigh_index = initial_rank[i,:k1+1]
        backward_k_neigh_index = initial_rank[forward_k_neigh_index,:k1+1]
        fi = np.where(backward_k_neigh_index==i)[0]
        k_reciprocal_index = forward_k_neigh_index[fi]
        k_reciprocal_expansion_index = k_reciprocal_index
        for j in range(len(k_reciprocal_index)):
            candidate = k_reciprocal_index[j]
            candidate_forward_k_neigh_index = initial_rank[candidate,:int(np.around(k1/2))+1]
            candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index,:int(np.around(k1/2))+1]
            fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0]
            candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate]
            if len(np.intersect1d(candidate_k_reciprocal_index,k_reciprocal_index))> 2/3*len(candidate_k_reciprocal_index):
                k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index,candidate_k_reciprocal_index)
            
        k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index)
        # weight = np.exp(-original_dist[i,k_reciprocal_expansion_index])
        # V[i,k_reciprocal_expansion_index] = weight/np.sum(weight)
        V[i, k_reciprocal_expansion_index] = original_dist[i,k_reciprocal_expansion_index]
    
    v_hdf5_file.close()
    rank_hdf5_file.close()
    dist_hdf5_file.close()

In [None]:
calc_V(0, 0, 80000)

# B榜

## 处理数据以及普通rerank

In [1]:
import os
import numpy as np
import torch
from collections import OrderedDict
import json
from tqdm import tqdm

os.environ['CUDA_VISIBLE_DEVICES'] = '1'
#device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [2]:
query_feature_B_dir = '/nfs3-p2/zsxm/naic/preliminary/test_B/query_feature_B'
gallery_feature_B_dir = '/nfs3-p2/zsxm/naic/preliminary/test_B/gallery_feature_B'

In [None]:
query_feature_B = []
for data in sorted(os.listdir(query_feature_B_dir)):
    query_feature_B.append(torch.from_numpy(np.fromfile(os.path.join(query_feature_B_dir, data), dtype='<f4')))
query_feature_B = torch.stack(query_feature_B)

In [None]:
gallery_feature_B = []
for data in sorted(os.listdir(gallery_feature_B_dir)):
    gallery_feature_B.append(torch.from_numpy(np.fromfile(os.path.join(gallery_feature_B_dir, data), dtype='<f4')))
gallery_feature_B = torch.stack(gallery_feature_B)

In [None]:
print(query_feature_B.shape, query_feature_B)
print(gallery_feature_B.shape, gallery_feature_B)
torch.save(query_feature_B, '/nfs3-p2/zsxm/naic/preliminary/test_B/query_feature_B.pt')
torch.save(gallery_feature_B, '/nfs3-p2/zsxm/naic/preliminary/test_B/gallery_feature_B.pt')

In [None]:
qfs = query_feature_B.sum(dim=0)
nqfs = torch.arange(2048)[qfs!=0]
print(nqfs.shape, nqfs)
gfs = gallery_feature_B.sum(dim=0)
ngfs = torch.arange(2048)[gfs!=0]
print(ngfs.shape, ngfs)
print(nqfs.equal(ngfs))

query_feature_A = torch.load('/nfs3-p2/zsxm/naic/preliminary/test_A/query_feature_A.pt')
tfs = query_feature_A.sum(dim=0)
ntfs = torch.arange(2048)[tfs!=0]
print(ntfs.shape, ntfs)
print(nqfs.equal(ntfs))
del query_feature_A

In [None]:
query_reshape_B = query_feature_B[:, nqfs]
gallery_reshape_B = gallery_feature_B[:, nqfs]
print(query_reshape_B.shape, gallery_reshape_B.shape)

In [None]:
torch.save(query_reshape_B, '/nfs3-p2/zsxm/naic/preliminary/test_B/query_reshape_B.pt')
torch.save(gallery_reshape_B, '/nfs3-p2/zsxm/naic/preliminary/test_B/gallery_reshape_B.pt')

In [3]:
query_reshape_B = torch.load('/nfs3-p2/zsxm/naic/preliminary/test_B/query_reshape_B.pt').to(device)
gallery_reshape_B = torch.load('/nfs3-p2/zsxm/naic/preliminary/test_B/gallery_reshape_B.pt').to(device)

In [None]:
def cos_similarity(q, k):
    try:
        q = torch.nn.functional.normalize(q, dim=1)
        k = torch.nn.functional.normalize(k, dim=1)
        res = torch.mm(q, k.T)
    except RuntimeError:
        q = q.cpu()
        k = k.cpu()
        q = torch.nn.functional.normalize(q, dim=1)
        k = torch.nn.functional.normalize(k, dim=1)
        res = torch.mm(q, k.T)
    return res

@torch.no_grad()
def batch_cos(q, k, batch_size=2048):
    res = []
    for i in tqdm(range(0, q.shape[0], batch_size)):
        bres = cos_similarity(q[i:i+batch_size], k)
        res.append(bres)
    return torch.cat(res)

def batch_pearson(q, k, batch_size=1024):
    k = k-k.mean(dim=-1, keepdim=True)
    q = q-q.mean(dim=-1, keepdim=True)
    res = []
    for i in tqdm(range(0, q.shape[0], batch_size)):
        bres = cos_similarity(q[i:i+batch_size], k)
        res.append(bres.cpu())
    return torch.cat(res)

def batch_sort(dist, batch_size=500):
    label = []
    for i in tqdm(range(0, dist.shape[0], batch_size)):
        temp = dist[i:i+batch_size].to(device)
        blabel = torch.argsort(temp, descending=True)
        label.append(blabel[:, :10].cpu())
        del blabel, temp
    return torch.cat(label)

def print_res(res, print_json=False):
    query_feature_B_dir = '/nfs3-p2/zsxm/naic/preliminary/test_B/query_feature_B'
    gallery_feature_B_dir = '/nfs3-p2/zsxm/naic/preliminary/test_B/gallery_feature_B'
    res_dict = {}
    query_names = sorted(os.listdir(query_feature_B_dir))
    gallery_names = sorted(os.listdir(gallery_feature_B_dir))
    for i, name in enumerate(tqdm(query_names)):
        idx = torch.argsort(res[i], dim=-1, descending=True)
        query_res = []
        for j in range(100):
            query_res.append(gallery_names[idx[j]])
        res_dict[name] = query_res
    if print_json:
        with open('./result_pearson_b.json', 'w') as f:
            json.dump(res_dict, f)

In [None]:
res = batch_pearson(query_reshape_B, gallery_reshape_B)

In [None]:
label = batch_sort(res)

In [None]:
avg_query_reshape_B = []
for i in tqdm(range(query_reshape_B.shape[0])):
    avg = 0.7 * gallery_reshape_B[label[i, 0]] + 0.3 * gallery_reshape_B[label[i, 1]]
    avg_query_reshape_B.append(avg)
avg_query_reshape_B = torch.stack(avg_query_reshape_B)
print(avg_query_reshape_B.shape)

In [None]:
res_rerank = batch_pearson(avg_query_reshape_B, gallery_reshape_B)

In [None]:
print_res(res, True)

## k-reciprocal

In [None]:
import os
import numpy as np
import torch
import json
from tqdm import tqdm
import tables

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
work_dir = '/nfs3-p2/zsxm/naic/preliminary/test_B/k_reciprocal/'
os.makedirs(work_dir, exist_ok=True)

In [None]:
query_reshape_B = torch.load('/nfs3-p2/zsxm/naic/preliminary/test_B/query_reshape_B.pt').to(device)
gallery_reshape_B = torch.load('/nfs3-p2/zsxm/naic/preliminary/test_B/gallery_reshape_B.pt').to(device)

In [None]:
all_reshape_B = torch.cat([query_reshape_B, gallery_reshape_B])
print(all_reshape_B.shape)

In [None]:
def cos_similarity(q, k):
    try:
        q = torch.nn.functional.normalize(q, dim=1)
        k = torch.nn.functional.normalize(k, dim=1)
        res = torch.mm(q, k.T)
    except RuntimeError:
        print('out of GPU memory!')
        q = q.cpu()
        k = k.cpu()
        q = torch.nn.functional.normalize(q, dim=1)
        k = torch.nn.functional.normalize(k, dim=1)
        res = torch.mm(q, k.T)
    return res

def batch_pearson(k, h5earray, batch_size=16):
    k = k-k.mean(dim=-1, keepdim=True)
    for i in tqdm(range(0, k.shape[0], batch_size)):
        bres = cos_similarity(k[i:i+batch_size], k)
        bres = (bres+1)/2
        h5earray.append(bres.cpu().numpy())
        
def batch_rank(k, dist_earray, rank_earry, batch_size=16, descending=True):
    for i in tqdm(range(0, k.shape[0], batch_size)):
        dist = torch.from_numpy(dist_earray[i:i+batch_size]).to(device)
        brklist = torch.argsort(dist, dim=-1, descending=descending).int().cpu().numpy()
        rank_earry.append(brklist[:, :101])

In [None]:
dist_hdf5_file = tables.open_file(os.path.join(work_dir, 'original_dist_pearson.hdf5'), mode='w')
original_dist = dist_hdf5_file.create_earray(dist_hdf5_file.root, 
                                             'original_dist', 
                                             tables.Float32Atom(), 
                                             shape=(0, all_reshape_B.shape[0]), 
                                             filters=tables.Filters(5), 
                                             expectedrows=all_reshape_B.shape[0])
batch_pearson(all_reshape_B, original_dist)
dist_hdf5_file.close()

In [None]:
rank_hdf5_file = tables.open_file(os.path.join(work_dir, 'initial_rank_pearson.hdf5'), mode='w')
initial_rank = rank_hdf5_file.create_earray(rank_hdf5_file.root, 
                                            'initial_rank', 
                                            tables.Int32Atom(), 
                                            shape=(0, 101), 
                                            filters=tables.Filters(), 
                                            expectedrows=all_reshape_B.shape[0])
dist_hdf5_file = tables.open_file(os.path.join(work_dir, 'original_dist_pearson.hdf5'), mode='r')
original_dist = dist_hdf5_file.root.original_dist
batch_rank(all_reshape_B, original_dist, initial_rank)

rank_hdf5_file.close()
dist_hdf5_file.close()

In [4]:
import os

for i in range(50):
    if not os.path.exists(os.path.join('/nfs3-p2/zsxm/naic/preliminary/test_B/k_reciprocal', f'jaccard_dist_{i}.pt')):
        print(i)

6
7
9
10
12
13


In [5]:
import os

for i in range(50):
    if not os.path.exists(os.path.join('/nfs3-p2/zsxm/naic/preliminary/test_B/k_reciprocal', f'res_{i}.pt')):
        print(i)

6
7
9
10
12
13
