In [1]:
from tqdm import tqdm
import numpy as np
import os
from torchvision import transforms, utils
import cv2
import glob
import torch
from PIL import Image
import time

# 将原始60帧的视频均匀采样10帧并保存，以便模型读取数据和训练时更快

In [2]:
def TransformImage(img):

    transform_list = []
    mean = [0.43216, 0.394666, 0.37645]
    std = [0.22803, 0.22145, 0.216989]
    
    transform_list.append(transforms.ToPILImage())
    transform_list.append(transforms.Resize([224,224]))
    transform_list.append(transforms.ToTensor())
    transform_list.append(transforms.Normalize(mean, std))
    trans = transforms.Compose(transform_list)
    frame_tensor = trans(img)
    
    return frame_tensor

In [6]:
def extract_feats(params):
    C, H, W = 3, 224, 224
    video_list = sorted(os.listdir(params['video_path']))
    output_dir = os.path.join(os.getcwd(), params['output_dir'])
    print(output_dir)
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)

    nn = 0
    total_len = len(video_list)
    print('total_len: ', total_len)
    for video in tqdm(video_list):
        # save file
        outfile = os.path.join(output_dir, video + '.npy')
        if os.path.exists(outfile):
            print(video, " is already processed!")
            continue

        nn = nn + 1
        # if nn == 3:
        #     break
        dst = video
        # print(video)
        if video == '.DS_Store':
            continue
        # print("\n-->: ", video)

        image_list = sorted(glob.glob(os.path.join(params['video_path'], dst, '*.jpg')))
        # print("len(image_list): ", len(image_list))
        samples = np.round(np.linspace(0, len(image_list) - 1, params['n_frame_steps']))
        # print(samples)
        image_list = [image_list[int(sample)] for sample in samples]
        # print("image_list len: ", len(image_list))
        images = torch.zeros((len(image_list), C, H, W))
        # print(type(images))
        s = time.time()
        for iImg in range(len(image_list)):
            img = cv2.imread(image_list[iImg])
            images[iImg] = TransformImage(img)
        # print(images.shape)
        # print('time consumption: ',time.time() - s)
        np.save(outfile, images)
    

In [7]:
params = {}
params['model'] = 'resnet18'
params['output_dir'] = '/data/frames_1fps/10f_video/'
params['video_path'] = './data/feats/frames_1fps/'
params['n_frame_steps'] = 10

extract_feats(params)
print('all videos have been processed!')

/data/frames_1fps/10f_video/
total_len:  9288


  0%|          | 0/9288 [00:00<?, ?it/s]

00000002  is already processed!
00000003  is already processed!
00000004  is already processed!
00000005  is already processed!
00000006  is already processed!
00000007  is already processed!
00000008  is already processed!
00000009  is already processed!
00000012  is already processed!
00000014  is already processed!
00000015  is already processed!


100%|██████████| 9288/9288 [1:46:45<00:00,  1.45it/s]  

all videos have been processed!





In [9]:
visual_posi = np.load('/data/frames_1fps/10f_video/00000002.npy')
print(type(visual_posi))

<class 'numpy.ndarray'>
