In [1]:
from tqdm import tqdm
from joblib import Parallel, delayed
import numpy as np
import os
from torchvision import transforms, utils
import cv2
import glob
import torch
import json
from PIL import Image
import time
from multiprocessing import Pool, Process, Queue
import multiprocessing
from einops import rearrange
import utils_3D

In [3]:
v = np.load('/data/avst/10f_video_2plus1/00000002.npy')
v.shape

(10, 3, 1, 112, 112)

# 将原始60帧的视频均匀采样10帧并保存，以便模型读取数据和训练时更快

In [5]:
def TransformImage(img):

    transform_list = []
    mean = [0.43216, 0.394666, 0.37645]
    std = [0.22803, 0.22145, 0.216989]
    
    transform_list.append(transforms.ToPILImage())
    transform_list.append(transforms.Resize([224,224]))
    transform_list.append(transforms.ToTensor())
    transform_list.append(transforms.Normalize(mean, std))
    trans = transforms.Compose(transform_list)
    frame_tensor = trans(img)
    
    return frame_tensor

def applyParallel(video_list, func):
    ret = Parallel(n_jobs=multiprocessing.cpu_count())(delayed(func)(video) for video in tqdm(video_list))
    return ret

In [6]:
def extract_feats_res18(video):
    C, H, W = 3, 224, 224
    # for video in video_list:
    s = time.time()
    # save file
    outfile = os.path.join(params['output_dir'], video + '.npy')
    if os.path.exists(outfile):
        # print(video, " is already processed!")
        return 0

    dst = video
    # print(video)
    if video == '.DS_Store':
        return 0
    # print("\n-->: ", video)

    image_list = sorted(glob.glob(os.path.join(params['video_path'], dst, '*.jpg')))
    # print(len(image_list))
    image_list = image_list[::params['n_frame_steps']]
    image_list = image_list[:params['n_frames']]
    # print("image_list: ", len(image_list))
    images = torch.zeros((len(image_list), C, H, W))
    # print(type(images))
    for iImg in range(len(image_list)):
        img = cv2.imread(image_list[iImg])
        images[iImg] = TransformImage(img)
    # print(images.shape)
    print('time consumption: ',time.time() - s)
    np.save(outfile, images)
    return 1
    

In [3]:
def extract_feats_r2plus1d(video):
    C, H, W = 3, 112, 112
    # for video in video_list:
    s = time.time()
    # save file
    outfile = os.path.join(params['output_dir'], video + '.npy')
    if os.path.exists(outfile):
        # print(video, " is already processed!")
        return 0

    dst = video
    # print(video)
    if video == '.DS_Store':
        return 0
    # print("\n-->: ", video)

    image_list = sorted(glob.glob(os.path.join(params['video_path'], dst, '*.jpg')))
    # print(len(image_list))
    image_list = image_list[::params['n_frame_steps']]
    image_list = image_list[:params['n_frames']]
    # print("image_list: ", len(image_list))
    images = torch.zeros((len(image_list)//1, C, 1, H, W))
    i = 0
    for iImg in range(len(image_list)):
        ii = i//1
        img = load_img(image_list[iImg])
        images[ii, :, i%1, :, :] = img
        i += 1
    # print(images.shape)
    # print('time consumption: ',time.time() - s)
    np.save(outfile, images)
    return 1

In [7]:
if __name__ == '__main__':
    params = {}
    params['output_dir'] = '/data/avst/20f_video/'
    params['video_path'] = '/data/avst/frames_1fps/'
    params['n_frames'] = 20
    params['n_frame_steps'] = 60 // params['n_frames']
    print(params['n_frame_steps'])
    load_img = utils_3D.LoadTransformImage()

    # 进程池
    if not os.path.isdir(params['output_dir']):
        os.mkdir(params['output_dir'])
    video_list = os.listdir(params['video_path'])
    
    result = applyParallel(video_list, extract_feats_res18)
    # extract_featsbin(video_list)
    print('all videos have been processed!')


3


100%|██████████| 9288/9288 [00:40<00:00, 231.78it/s]

all videos have been processed!





In [4]:
# visual_posi = np.load('/data/frames_1fps/20f_video/00000002.npy')
import os
# video_list = sorted(os.listdir('/data/avst/videos/'))
frame_list = sorted(os.listdir('/data/avst/r2plus1d_18/'))
print(len(frame_list))
frame_list[:5]

9288


['00000002.npy',
 '00000003.npy',
 '00000004.npy',
 '00000005.npy',
 '00000006.npy']

# 从原始视频中固定抽取60帧并保存

In [1]:
from tqdm import tqdm
from joblib import Parallel, delayed
import numpy as np
import os
import sys
from torchvision import transforms, utils
import cv2
import glob
import torch
from PIL import Image
import time
from multiprocessing import Pool, Process, Queue
import multiprocessing
import os
from imageio import imsave
from moviepy.editor import VideoFileClip, concatenate_videoclips
import warnings
warnings.filterwarnings('ignore')

In [2]:
def is_generate(out_path, dst=60):
    if not os.path.exists(out_path):
        return False
    folder_list = os.listdir(out_path)
    jpg_number = 0
    for file_name in folder_list:
        if file_name.strip().lower().endswith('.jpg'):
            jpg_number += 1
    return jpg_number >= dst


def fixed_video(clip, video_len_pre):
    if clip.duration >= video_len_pre:
        return clip
    t_start = int(clip.duration)
    if t_start == clip.duration:
        t_start = -1
    last_clip = clip.subclip(t_start)
    final_clip = clip
    while final_clip.duration < video_len_pre:
        final_clip = concatenate_videoclips([final_clip, last_clip])
    return final_clip


def read_frame(reader, pos):
    if not reader.proc:
        reader.initialize()
        reader.pos = pos
        reader.lastread = reader.read_frame()

    if pos == reader.pos:
        return reader.lastread
    elif (pos < reader.pos) or (pos > reader.pos + 100):
        reader.initialize()
        reader.pos = pos
    else:
        reader.skip_frames(pos - reader.pos - 1)
    result = reader.read_frame()
    reader.pos = pos
    return result

def compute_numbers(outpath): 
    folder_list = os.listdir(outpath)
    folder_list.sort()

    jpg_number = 0
    pkl_number = 0
    for file_name in folder_list:
        if os.path.splitext(file_name)[-1].lower() == ".jpg":
            jpg_number += 1
        if os.path.splitext(file_name)[-1].lower() == ".pkl":
            pkl_number += 1

    if jpg_number != 10 or pkl_number != 10:
        return 1
    else:
        return 0

In [3]:
def deal_video(video_file, out_path, fix_second, fps_count):
    start = time.time()
    # total_temp = compute_numbers(out_path)
    total_temp = 1
    if total_temp == 0:
        return 0
    else:
        try:
            if not os.path.exists(out_path):
                os.makedirs(out_path)
            if not os.path.isfile(video_file):
                print("deal video error, %s is not a file", video_file)
                return 0
            with VideoFileClip(video_file) as clip:
                step = 1

                reader = clip.reader
                fps = clip.reader.fps
                total_frames = reader.nframes

                last_frames = int(total_frames % fps)

                if last_frames == 0:
                    last_frames = int(fps)
                last_start = total_frames - last_frames


                save_frame_index_arr = []

                video_len_pre = fix_second
                if fix_second == 0:
                    video_len_pre = round(total_frames/fps)

                video_len_pre = video_len_pre * fps_count

                for i in range(video_len_pre):
                    absolute_frame_pos = round((1 / (2*fps_count) + i / fps_count) * fps)

                    if absolute_frame_pos > total_frames:
                        relative_frame_pos = last_start + 1 + ((absolute_frame_pos - last_start - 1) % last_frames)
                    else:
                        relative_frame_pos = absolute_frame_pos

                    save_frame_index_arr.append(relative_frame_pos)

                save_frame_map = {}
                loop_arr = list(set(save_frame_index_arr))
                loop_arr.sort()
                for i in loop_arr:
                    if i not in save_frame_map:
                        im = read_frame(reader, i)
                        save_frame_map[i] = im

                for i in range(len(save_frame_index_arr)):
                    try:
                        out_file_name = os.path.join(out_path, "{:08d}.jpg".format(i + 1))
                        im = save_frame_map[save_frame_index_arr[i]]
                        imsave(out_file_name, im)
                    except Exception as e:
                        print("(%s) save frame(%s) error", video_file, str(i + 1), e)

        except Exception as e:
            print("deal video(%s) error", video_file, e)
    print('time_consumption: ', time.time() - start)
    return 1

In [4]:
def process_dir_path_class(video_file):
    name, ext = os.path.splitext(video_file)
    out_path = os.path.join(param["dst_path_class"], name)
    if os.path.exists(out_path) and is_generate(out_path):
        # print("Progress: ", " ------- id: ", video_file, " is already extracted!")
        return 0
    else:
        flag = deal_video(os.path.join(param["dir_path_class"], video_file), out_path, param["fix_second"], param["fps_count"])
    if flag == 1:
        lgt = len(os.listdir(out_path))
        print("Progress: ", " ------- id: ", video_file, 'length of video is :',lgt)


def applyParallel(video_list, func, num_process):
    ret = Parallel(n_jobs=num_process)(delayed(func)(video) for video in tqdm(video_list))
    return ret

In [None]:
if __name__ == "__main__":
    param = {}
    param["dir_path_class"] = '/data/avst/videos/'
    param["dst_path_class"] = '/data/avst/frames/'
    param["fix_second"] = 0
    param["fps_count"] = 1
    
    video_files = os.listdir(param["dir_path_class"])
    num_process = 10
    # num_process = multiprocessing.cpu_count()
    # video_files.sort()
    result = applyParallel(video_files, process_dir_path_class, num_process)
    print('all videos have been processed!')

# pdf 2 png

In [12]:
import os
import sys
import fitz
from reportlab.lib.pagesizes import portrait
from reportlab.pdfgen import canvas
from PIL import Image

In [None]:
def pdf2img(filename):
	#  打开PDF文件，生成一个对象
	doc = fitz.open(filename)
	print("共",doc.pageCount,"页")
	for pg in range(doc.pageCount):
		print("\r转换为图片",pg+1,"/",doc.pageCount,end="")
		page = doc[pg]
		rotate = int(0)
		# 每个尺寸的缩放系数为8，这将为我们生成分辨率提高64倍的图像。一般设为2
		zoom_x = 8.0
		zoom_y = 8.0
		trans = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate)
		pm = page.getPixmap(matrix=trans, alpha=False)
		pm.writePNG(filename+'_tu'+'{:02}.png' .format(pg))
	print('end')

In [None]:
if __name__ == '__main__':
    filename='AVQA.pdf'
    pdf2img(filename)