## 特征提取

In [None]:
import os, sys, codecs
import glob
import pandas as pd
import numpy as np
import pickle
from PIL import Image
from tqdm import tqdm

import cv2

from sklearn.preprocessing import normalize as sknormalize
from sklearn.decomposition import PCA

import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset

PATH = '/home/wx/work/video_copy_detection/'
TRAIN_PATH = PATH + 'train/'
TEST_PATH = PATH + 'test/'
TRAIN_QUERY_PATH = TRAIN_PATH + 'query/'
REFER_PATH = TRAIN_PATH + 'refer/'
TRAIN_QUERY_FRAME_PATH = TRAIN_PATH + 'query_uniformframe/'
REFER_FRAME_PATH = TRAIN_PATH + 'refer_uniformframe/'
TEST_QUERY_PATH = TEST_PATH + 'query2/'
TEST_QUERY_FRAME_PATH = TEST_PATH + 'query2_uniformframe/'
CODE_DIR = PATH + 'code/'

In [None]:
class QRDataset(Dataset):
    def __init__(self, img_path, transform = None):
        self.img_path = img_path

        self.img_label = np.zeros(len(img_path))
    
        if transform is not None:
            self.transform = transform
        else:
            self.transform = None
    
    def __getitem__(self, index):
        img = Image.open(self.img_path[index])
        
        if self.transform is not None:
            img = self.transform(img)
        
        return img, self.img_path[index]

    def __len__(self):
        return len(self.img_path)

class Img2Vec():

    def __init__(self, model='resnet-18', layer='default', layer_output_size=512):
        """ Img2Vec
        :param model: String name of requested model
        :param layer: String or Int depending on model.
        :param layer_output_size: Int depicting the output size of the requested layer
        """
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.layer_output_size = layer_output_size
        self.model_name = model
        
        self.model, self.extraction_layer = self._get_model_and_layer(model, layer)

        self.model = self.model.to(self.device)

        self.model.eval()

        self.transformer = transforms.Compose([
            transforms.Resize((224, 224)), 
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])

    def get_vec(self, path):
        """ Get vector embedding from PIL image
        :param path: Path of image dataset
        :returns: Numpy ndarray
        """
        if not isinstance(path, list):
            path = [path]

        data_loader = torch.utils.data.DataLoader(QRDataset(path, self.transformer), batch_size = 40, 
                                                  shuffle = False, num_workers = 16)

        my_embedding = []

        # hook function
        def append_data(module, input, output):
            my_embedding.append(output.clone().detach().cpu().numpy())
    
        with torch.no_grad():
            for batch_data in tqdm(data_loader):
                batch_x, batch_y = batch_data
                if torch.cuda.is_available():
                    batch_x = Variable(batch_x, requires_grad = False).cuda()
                else:
                    batch_x = Variable(batch_x, requires_grad = False)

                h = self.extraction_layer.register_forward_hook(append_data)
                h_x = self.model(batch_x)
                h.remove()
                del h_x

        my_embedding = np.vstack(my_embedding)
        if self.model_name == 'alexnet':
            return my_embedding[:, :]
        else:
            return my_embedding[:, :, 0, 0]

    def _get_model_and_layer(self, model_name, layer):
        """ Internal method for getting layer from model
        :param model_name: model name such as 'resnet-18'
        :param layer: layer as a string for resnet-18 or int for alexnet
        :returns: pytorch model, selected layer
        """
        if model_name == 'resnet-18':
            model = models.resnet18(pretrained=True)
            if layer == 'default':
                layer = model._modules.get('avgpool')
                self.layer_output_size = 512
            else:
                layer = model._modules.get(layer)

            return model, layer

        elif model_name == 'alexnet':
            model = models.alexnet(pretrained=True)
            if layer == 'default':
                layer = model.classifier[-2]
                self.layer_output_size = 4096
            else:
                layer = model.classifier[-layer]

            return model, layer

        else:
            raise KeyError('Model %s was not found' % model_name)

In [None]:
# 读取 test_query 视频的帧，并按照视频和帧时间进行排序
test_query_imgs_path = []
for id in pd.read_csv(TEST_PATH + 'submit_example2.csv')['query_id']:
    test_query_imgs_path += glob.glob(TEST_QUERY_FRAME_PATH + id + '/*.jpg')

test_query_imgs_path.sort(key = lambda x: x.lower())

In [4]:
# 读取 train_query 视频的帧，并按照视频和帧时间进行排序
train_query_imgs_path = []
for id in pd.read_csv(TRAIN_PATH + 'train.csv')['query_id']:
    train_query_imgs_path += glob.glob(TRAIN_QUERY_FRAME_PATH + id + '/*.jpg')

train_query_imgs_path.sort(key = lambda x: x.lower())

In [5]:
# 读取 refer 视频的帧，并按照视频和帧时间进行排序

refer_imgs_path = glob.glob(REFER_FRAME_PATH + '*/*.jpg')
refer_imgs_path.sort(key = lambda x: x.lower())

In [6]:
# Initialize Img2Vec
img2vec = Img2Vec()

In [None]:
# 抽取 test_query 关键帧特征
test_query_features = img2vec.get_vec(test_query_imgs_path[:])

  6%|▌         | 1396/24966 [02:50<14:28, 27.12it/s]   

In [None]:
# 抽取 train_query 关键帧特征
train_query_features = img2vec.get_vec(train_query_imgs_path[:])

In [None]:
# 抽取 refer 关键帧特征
refer_features = img2vec.get_vec(list(refer_imgs_path[:]))

In [None]:
def normalize(x, copy = False):
    """
    A helper function that wraps the function of the same name in sklearn.
    This helper handles the case of a single column vector.
    """
    if type(x) == np.ndarray and len(x.shape) == 1:
        return np.squeeze(sknormalize(x.reshape(1, -1), copy = copy))
        #return np.squeeze(x / np.sqrt((x ** 2).sum(-1))[..., np.newaxis])
    else:
        return sknormalize(x, copy = copy)
        #return x / np.sqrt((x ** 2).sum(-1))[..., np.newaxis]

In [None]:
# PCA 降维
'''
pca = PCA(n_components=512)

train_query_features = pca.fit_transform(train_query_features)
test_query_features = pca.fit_transform(test_query_features)
refer_features = pca.fit_transform(refer_features)
'''


In [None]:
# L2正则化
train_query_features = normalize(train_query_features)
test_query_features = normalize(test_query_features)
refer_features = normalize(refer_features)

In [None]:
# 保存 test_query 帧特征

with open(PATH + 'var/test_query_features_uni.pk', 'wb') as pk_file:
    pickle.dump(test_query_features, pk_file)

In [None]:
# 保存 train_query 帧特征

with open(PATH + 'var/train_query_features_uni.pk', 'wb') as pk_file:
    pickle.dump(train_query_features, pk_file)

In [None]:
# 保存 refer 帧特征

with open(PATH + 'var/refer_features_uni.pk', 'wb') as pk_file:
    pickle.dump(refer_features, pk_file)