## 相似视频检索

视频级相似匹配 -> 帧级匹配

In [1]:
import glob
import pandas as pd
import pickle

import cv2
import imagehash
import numpy as np
from tqdm import tqdm
from PIL import Image
from scipy.spatial.distance import cdist

PATH = '/home/wx/work/video_copy_detection/'
TRAIN_PATH = PATH + 'train/'
TEST_PATH = PATH + 'test/'
TRAIN_QUERY_PATH = TRAIN_PATH + 'query/'
REFER_PATH = TRAIN_PATH + 'refer/'
TRAIN_QUERY_FRAME_PATH = TRAIN_PATH + 'query_frame/'
REFER_FRAME_PATH = TRAIN_PATH + 'refer_frame/'
TEST_QUERY_PATH = TEST_PATH + 'query/'
TEST_QUERY_FRAME_PATH = TEST_PATH + 'query_frame/'
CODE_DIR = PATH + 'code/'

In [2]:
# 读取特征文件
with open(PATH + 'var/train_query_features.pk', 'rb') as pk_file:
    train_query_features = pickle.load(pk_file)

with open(PATH + 'var/test_query_features.pk', 'rb') as pk_file:
    test_query_features = pickle.load(pk_file)

with open(PATH + 'var/refer_features.pk', 'rb') as pk_file:
    refer_features = pickle.load(pk_file)

In [3]:
# 读取 train_query 视频的关键帧，并按照视频和关键帧时间进行排序
train_query_imgs_path = []
train_query_vids = []
for id in pd.read_csv(TRAIN_PATH + 'train.csv')['query_id']:
    train_query_imgs_path += glob.glob(TRAIN_QUERY_FRAME_PATH + id + '/*.jpg')
    train_query_vids += [id]

train_query_imgs_path.sort(key = lambda x: x.lower())
train_query_vids.sort(key = lambda x: x.lower())

In [4]:
# 读取 test_query 视频的关键帧，并按照视频和关键帧时间进行排序
test_query_imgs_path = []
test_query_vids = []
for id in pd.read_csv(TEST_PATH + 'submit_example.csv')['query_id']:
    test_query_imgs_path += glob.glob(TEST_QUERY_FRAME_PATH + id + '/*.jpg')
    test_query_vids += [id]

test_query_imgs_path.sort(key = lambda x: x.lower())
test_query_vids.sort(key = lambda x: x.lower())

In [5]:
# 读取refer视频的关键帧，并按照视频和关键帧时间进行排序

refer_imgs_path = glob.glob(REFER_FRAME_PATH + '*/*.jpg')
refer_imgs_path.sort(key = lambda x: x.lower())

refer_vids = []
for path in refer_imgs_path:
    vid = path.split('/')[-2]
    refer_vids += [vid]
refer_vids = list(set(refer_vids))
refer_vids.sort(key = lambda x: x.lower())

In [6]:
vids = np.concatenate((train_query_vids, test_query_vids, refer_vids), axis=0)

In [7]:
# 特征按视频归类

vid2features = {}
for (path, cur_feat) in tqdm(zip(train_query_imgs_path, train_query_features)):
    vid = path.split('/')[-2]
    if(not vid in vid2features):
        vid2features[vid] = [cur_feat]
    else:
        vid2features[vid] = np.concatenate((vid2features[vid], [cur_feat]), axis=0)

for (path, cur_feat) in tqdm(zip(test_query_imgs_path, test_query_features)):
    vid = path.split('/')[-2]
    if(not vid in vid2features):
        vid2features[vid] = [cur_feat]
    else:
        vid2features[vid] = np.concatenate((vid2features[vid], [cur_feat]), axis=0)

for (path, cur_feat) in tqdm(zip(refer_imgs_path, refer_features)):
    vid = path.split('/')[-2]
    if(not vid in vid2features):
        vid2features[vid] = [cur_feat]
    else:
        vid2features[vid] = np.concatenate((vid2features[vid], [cur_feat]), axis=0)


125100it [00:01, 71065.81it/s]
62555it [00:00, 97641.52it/s]
181052it [00:22, 8196.60it/s]


In [8]:
#with open(PATH + 'var/vid2features.pk', 'wb') as pk_file:
#    pickle.dump(vid2features, pk_file)

In [9]:
vid2features[refer_vids[0]].shape

(179, 512)

In [10]:
# 计算全局特征向量
vid2gv = {}
for vid in tqdm(vids):
    cur_feat = vid2features[vid]
    mean_feat = np.mean(cur_feat, axis=0, dtype=float)
    vid2gv[vid] = mean_feat

100%|██████████| 4705/4705 [00:00<00:00, 7048.02it/s]


In [11]:
vid2gv[vids[0]][:10]

array([-0.02394   , -0.00686392, -0.010284  , -0.00620301,  0.00018242,
       -0.00684416,  0.0036758 , -0.0022393 ,  0.00056661,  0.000713  ])

In [12]:
#with open(PATH + 'var/vid2gv.pk', 'wb') as pk_file:
#    pickle.dump(vid2gv, pk_file)

In [13]:
def calculate_similarities(query_features, all_features):
    """
      用于计算两组特征(已经做过l2-norm)之间的相似度
      Args:
        queries: shape: [N, D]
        features: shape: [M, D]
      Returns:
        similarities: shape: [N, M]
    """
    similarities = []
    # 计算待查询视频和所有视频的距离
    dist = np.nan_to_num(cdist(query_features, all_features, metric='cosine'))
    for i, v in enumerate(query_features):
        # 归一化，将距离转化成相似度
        # sim = np.round(1 - dist[i] / dist[i].max(), decimals=6)
        sim = 1 - dist[i]
        # 按照相似度的从大到小排列，输出index
        similarities += [[(s, sim[s]) for s in sim.argsort()[::-1] if not np.isnan(sim[s])]]
    return similarities

In [14]:
# 获取全局特征矩阵
train_gv_features = []
for vid in tqdm(train_query_vids):
    train_gv_features += [vid2gv[vid]]
train_gv_features = np.array(train_gv_features)

test_gv_features = []
for vid in tqdm(test_query_vids):
    test_gv_features += [vid2gv[vid]]
test_gv_features = np.array(test_gv_features)

refer_gv_features = []
for vid in tqdm(refer_vids):
    refer_gv_features += [vid2gv[vid]]
refer_gv_features = np.array(refer_gv_features)

100%|██████████| 3000/3000 [00:00<00:00, 534896.79it/s]
100%|██████████| 1500/1500 [00:00<00:00, 631102.02it/s]
100%|██████████| 205/205 [00:00<00:00, 223856.37it/s]


In [15]:
similarities = calculate_similarities(train_gv_features, refer_gv_features)

In [79]:
refer_vids[5]

'1212504600'

In [6]:
path_flag = [0]
threshold = 0.9
for idx, (path, feat) in enumerate(zip(train_query_imgs_path, train_query_features)):            # (125100, 512)
    idxs, rank_dists, rank_names = compute_cosin_distance(feat, refer_features, refer_imgs_path) # (181052, 512)
    if rank_dists[0] > threshold:
        # if hamming_distance(refer_hash[idxs[0]], query_hash[idx]) < 5:
        
        if path.split('/')[-2] != path_flag[-1]:
            print('')
        
        print(path.split('/')[-1], rank_names[0].split('/')[-1])
        path_flag.append(path.split('/')[-2])

In [8]:
Image.open(TEST_QUERY_FRAME_PATH + '016cf7e8-b8d6-11e9-930e-fa163ee49799/016cf7e8-b8d6-11e9-930e-fa163ee49799_00000031.500000.jpg')

In [9]:
Image.open(REFER_FRAME_PATH + '1293733400/1293733400_00000586.520000.jpg')

In [14]:
# 提交一个最简单的结果

submit_df = pd.read_csv(TEST_PATH + 'submit_example.csv')
submit_df.loc[submit_df['query_id'] == '016cf7e8-b8d6-11e9-930e-fa163ee49799', 
              ['query_time_range(ms)', 'refer_id', 'refer_time_range(ms)']] = ['0|80500', '1293733400', '554000|635000']
submit_df.to_csv('result.csv', index = None, sep=',')