## 相似视频检索

视频级相似匹配 -> 帧级匹配

In [35]:
import glob
import pandas as pd
import pickle

import cv2
import imagehash
import numpy as np
import networkx as nx
from tqdm import tqdm
from PIL import Image
from scipy.spatial.distance import cdist
from scipy.spatial.distance import cosine
from networkx.algorithms.dag import dag_longest_path

PATH = '/home/wx/work/video_copy_detection/'
TRAIN_PATH = PATH + 'train/'
TEST_PATH = PATH + 'test/'
TRAIN_QUERY_PATH = TRAIN_PATH + 'query/'
REFER_PATH = TRAIN_PATH + 'refer/'
TRAIN_QUERY_FRAME_PATH = TRAIN_PATH + 'query_frame/'
REFER_FRAME_PATH = TRAIN_PATH + 'refer_frame/'
TEST_QUERY_PATH = TEST_PATH + 'query/'
TEST_QUERY_FRAME_PATH = TEST_PATH + 'query_frame/'
CODE_DIR = PATH + 'code/'

In [2]:
# 读取特征文件
with open(PATH + 'var/train_query_features.pk', 'rb') as pk_file:
    train_query_features = pickle.load(pk_file)

with open(PATH + 'var/test_query_features.pk', 'rb') as pk_file:
    test_query_features = pickle.load(pk_file)

with open(PATH + 'var/refer_features.pk', 'rb') as pk_file:
    refer_features = pickle.load(pk_file)

In [70]:
# 读取 train_query 视频的关键帧
# 按照视频和关键帧时间进行排序
# 预处理工具 dict
train_query_imgs_path = []
train_query_vids = []
train_query_vid2idx = {}
train_query_idx2vid = {}
train_query_vid2baseaddr = {}
train_query_fid2path = {}
train_query_fid2vid = {}
train_query_fid2time = {}

for id in pd.read_csv(TRAIN_PATH + 'train.csv')['query_id']:
    train_query_imgs_path += glob.glob(TRAIN_QUERY_FRAME_PATH + id + '/*.jpg')
    train_query_vids += [id]

train_query_imgs_path.sort(key = lambda x: x.lower())
train_query_vids.sort(key = lambda x: x.lower())


idx = 0
for vid in train_query_vids:
    train_query_vid2idx[vid] = idx
    train_query_idx2vid[idx] = vid
    idx += 1
fid = 0
pre_vid = ""
cur_base = 0
for idx, path in enumerate(train_query_imgs_path):
    cur_vid = path.split('/')[-1][:-20]
    train_query_fid2vid[fid] = cur_vid
    train_query_fid2path[fid] = path
    train_query_fid2time[fid] = float(path.split('/')[-1].split('_')[-1][:-4])
    if pre_vid != cur_vid:
        cur_base = idx
        pre_vid = cur_vid
    train_query_vid2baseaddr[cur_vid] = cur_base
    fid += 1

In [67]:
# path.split('/')[-1][:-20]
# float(path.split('/')[-1].split('_')[-1][:-4])


In [71]:
# 读取 test_query 视频的关键帧
# 按照视频和关键帧时间进行排序
# 预处理工具 dict
test_query_imgs_path = []
test_query_vids = []
test_query_vid2idx = {}
test_query_idx2vid = {}
test_query_vid2baseaddr = {}
test_query_fid2path = {}
test_query_fid2vid = {}
test_query_fid2time = {}

for id in pd.read_csv(TEST_PATH + 'submit_example.csv')['query_id']:
    test_query_imgs_path += glob.glob(TEST_QUERY_FRAME_PATH + id + '/*.jpg')
    test_query_vids += [id]

test_query_imgs_path.sort(key = lambda x: x.lower())
test_query_vids.sort(key = lambda x: x.lower())

idx = 0
for vid in test_query_vids:
    test_query_vid2idx[vid] = idx
    test_query_idx2vid[idx] = vid
    idx += 1
fid = 0
pre_vid = ""
cur_base = 0
for idx, path in enumerate(test_query_imgs_path):
    cur_vid = path.split('/')[-1][:-20]
    test_query_fid2vid[fid] = cur_vid
    test_query_fid2path[fid] = path
    test_query_fid2time[fid] = float(path.split('/')[-1].split('_')[-1][:-4])
    if pre_vid != cur_vid:
        cur_base = idx
        pre_vid = cur_vid
    test_query_vid2baseaddr[cur_vid] = cur_base
    fid += 1

In [72]:
# 读取 refer_query 视频的关键帧
# 按照视频和关键帧时间进行排序
# 预处理工具 dict

refer_imgs_path = glob.glob(REFER_FRAME_PATH + '*/*.jpg')
refer_imgs_path.sort(key = lambda x: x.lower())

refer_vids = []
refer_vid2idx = {}
refer_idx2vid = {}
refer_vid2baseaddr = {}
refer_fid2path = {}
refer_fid2vid = {}
refer_fid2time = {}

for path in refer_imgs_path:
    vid = path.split('/')[-2]
    refer_vids += [vid]

refer_vids = list(set(refer_vids))
refer_vids.sort(key = lambda x: x.lower())

idx = 0
for vid in refer_vids:
    refer_vid2idx[vid] = idx
    refer_idx2vid[idx] = vid
    idx += 1
fid = 0
pre_vid = ""
cur_base = 0
for idx, path in enumerate(refer_imgs_path):
    cur_vid = path.split('/')[-1][:-20]
    refer_fid2vid[fid] = cur_vid
    refer_fid2path[fid] = path
    refer_fid2time[fid] = float(path.split('/')[-1].split('_')[-1][:-4])
    if pre_vid != cur_vid:
        cur_base = idx
        pre_vid = cur_vid
    refer_vid2baseaddr[cur_vid] = cur_base
    fid += 1

In [21]:
vids = np.concatenate((train_query_vids, test_query_vids, refer_vids), axis=0)

In [22]:
# 特征按视频归类
if False:
    vid2features = {}
    for (path, cur_feat) in tqdm(zip(train_query_imgs_path, train_query_features)):
        vid = path.split('/')[-2]
        if(not vid in vid2features):
            vid2features[vid] = [cur_feat]
        else:
            vid2features[vid] = np.concatenate((vid2features[vid], [cur_feat]), axis=0)

    for (path, cur_feat) in tqdm(zip(test_query_imgs_path, test_query_features)):
        vid = path.split('/')[-2]
        if(not vid in vid2features):
            vid2features[vid] = [cur_feat]
        else:
            vid2features[vid] = np.concatenate((vid2features[vid], [cur_feat]), axis=0)

    for (path, cur_feat) in tqdm(zip(refer_imgs_path, refer_features)):
        vid = path.split('/')[-2]
        if(not vid in vid2features):
            vid2features[vid] = [cur_feat]
        else:
            vid2features[vid] = np.concatenate((vid2features[vid], [cur_feat]), axis=0)
    
    with open(PATH + 'var/vid2features.pk', 'wb') as pk_file:
        pickle.dump(vid2features, pk_file)
else:
    with open(PATH + 'var/vid2features.pk', 'rb') as pk_file:
        vid2features = pickle.load(pk_file)


In [23]:
vid2features[refer_vids[0]].shape

(179, 1024)

In [49]:
def compute_similarities(query_features, refer_features):
    """
      用于计算两组特征(已经做过l2-norm)之间的相似度
      Args:
        query_features: shape: [1, D]
        refer_features: shape: [M, D]
      Returns:
        sorted_sims: shape: [N, M]
        unsorted_sims: shape: [N, M]
    """
    sorted_sims = []
    unsorted_sims = []
    # 计算待查询视频和所有视频的距离
    dist = np.nan_to_num(cdist(query_features, refer_features, metric='cosine'))
    for i, v in enumerate(query_features):
        # 归一化，将距离转化成相似度
        # sim = np.round(1 - dist[i] / dist[i].max(), decimals=6)
        sim = 1 - dist[i]
        # 按照相似度的从大到小排列，输出index
        unsorted_sims += [sim]
        sorted_sims += [[(s, sim[s]) for s in sim.argsort()[::-1] if not np.isnan(sim[s])]]
    return sorted_sims, unsorted_sims

In [62]:
def get_frame_alignment(query_features, refer_features, top_K=5, min_sim=0.7, max_step=5):
    """
      用于计算两组特征(已经做过l2-norm)之间的帧匹配结果
      Args:
        query_features: shape: [N, D]
        refer_features: shape: [M, D]
        top_K: 取前K个refer_frame
        min_sim: 要求query_frame与refer_frame的最小相似度
        max_step: 有边相连的结点间的最大步长
      Returns:
        path_query: shape: [1, L]
        path_refer: shape: [1, L]
    """
    node_pair2id = {}
    node_id2pair = {}
    node_id2pair[0] = (-1, -1) # source
    node_pair2id[(-1, -1)] = 0
    node_num = 1

    DG = nx.DiGraph()
    DG.add_node(0)

    sorted_sims, unsorted_sims = compute_similarities(query_features, refer_features)

    # add nodes
    for qf_idx in range(query_features.shape[0]):
        for k in range(top_K):
            rf_idx = sorted_sims[qf_idx][k][0]
            sim = sorted_sims[qf_idx][k][1]
            if sim < min_sim:
                break
            node_id2pair[node_num] = (qf_idx, rf_idx)
            node_pair2id[(qf_idx, rf_idx)] = node_num
            DG.add_node(node_num)
            node_num += 1
    
    node_id2pair[node_num] = (query_features.shape[0], refer_features.shape[0]) # sink
    node_pair2id[(query_features.shape[0], refer_features.shape[0])] = node_num
    DG.add_node(node_num)
    node_num += 1

    # link nodes

    for i in range(0, node_num - 1):
        for j in range(i + 1, node_num - 1):
            
            pair_i = node_id2pair[i]
            pair_j = node_id2pair[j]

            if(pair_j[0] > pair_i[0] and pair_j[1] > pair_i[1] and
               pair_j[0] - pair_i[0] <= max_step and pair_j[1] - pair_i[1] <= max_step):
               qf_idx = pair_j[0]
               rf_idx = pair_j[1]
               DG.add_edge(i, j, weight=unsorted_sims[qf_idx][rf_idx])

    for i in range(0, node_num - 1):
        j = node_num - 1

        pair_i = node_id2pair[i]
        pair_j = node_id2pair[j]

        if(pair_j[0] > pair_i[0] and pair_j[1] > pair_i[1] and
            pair_j[0] - pair_i[0] <= max_step and pair_j[1] - pair_i[1] <= max_step):
            qf_idx = pair_j[0]
            rf_idx = pair_j[1]
            DG.add_edge(i, j, weight=0)

    longest_path = dag_longest_path(DG)
    path_query = [node_id2pair[node_id][0] for node_id in longest_path]
    path_refer = [node_id2pair[node_id][1] for node_id in longest_path]

    return path_query, path_refer

In [81]:
q_vid = train_query_vids[0]
r_vid = '1226686400'
query = vid2features[q_vid]
refer = vid2features[r_vid]
q_baseaddr = train_query_vid2baseaddr[q_vid]
r_baseaddr = refer_vid2baseaddr[r_vid]
path_query, path_refer = get_frame_alignment(query, refer) # local address
time_query = [int(train_query_fid2time[q_baseaddr + qf_id] * 1000) for qf_id in path_query]
time_refer = [int(refer_fid2time[r_baseaddr + rf_id] * 1000) for rf_id in path_refer]
print("query_time_range(ms): {}|{}".format(time_query[0], time_query[-1]))
print("refer_time_range(ms): {}|{}".format(time_refer[0], time_refer[-1]))

train_df = pd.read_csv(TRAIN_PATH + 'train.csv')
print(train_df.loc[train_df['query_id'] == q_vid])


query_time_range(ms): 90400|174000
refer_time_range(ms): 3555120|3648280
                                  query_id query_time_range(ms)    refer_id  \
2021  001c2348-b8e3-11e9-bf24-fa163ee49799         89530|173990  1226686400   

     refer_time_range(ms)  
2021      3554350|3638810  


In [14]:
# 提交一个最简单的结果

#submit_df = pd.read_csv(TEST_PATH + 'submit_example.csv')
#submit_df.loc[submit_df['query_id'] == '016cf7e8-b8d6-11e9-930e-fa163ee49799', 
#              ['query_time_range(ms)', 'refer_id', 'refer_time_range(ms)']] = ['0|80500', '1293733400', '554000|635000']
#submit_df.to_csv('result.csv', index = None, sep=',')