In [1]:
import os
from sklearn.model_selection import train_test_split
import cv2
import numpy as np

def process_video(ori_data_path, video, action_name, save_dir):
    resize_height = 128
    resize_width = 171
    video_filename = video.split('.')[0]
    if not os.path.exists(os.path.join(save_dir, video_filename)):
        os.mkdir(os.path.join(save_dir, video_filename))
        
    capture = cv2.VideoCapture(os.path.join(ori_data_path, action_name, video))
    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    EXTRACT_FREQUENCY = 4
    if frame_count // EXTRACT_FREQUENCY <= 16:
        EXTRACT_FREQUENCY -= 1
        if frame_count //EXTRACT_FREQUENCY <= 16:
            EXTRACT_FREQUENCY -= 1
            if frame_count //EXTRACT_FREQUENCY <= 16:
                EXTRACT_FREQUENCY -= 1
                
    count = 0
    i = 0
    retaining = True
    
    while count < frame_count and retaining:
        retaining, frame = capture.read()
        if frame is None:
            continue
        if count % EXTRACT_FREQUENCY == 0:
            if (frame_height != resize_height) or (frame_width != resize_width):
                frame = cv2.resize(frame, (resize_width, resize_height))
            cv2.imwrite(filename=os.path.join(save_dir, video_filename, 
                                              '0000{}.jpg'.format(str(i))), img=frame)
            i += 1
        count += 1
    capture.release()

def preprocess(ori_data_path, output_data_path):
    # 查看是否存在输出文件地址，如果没有则创建，同时创建train、test、val文件夹
    if not os.path.exists(output_data_path):
        os.mkdir(output_data_path)
    if not os.path.exists(os.path.join(output_data_path, 'train')):
        os.mkdir(os.path.join(output_data_path, 'train'))
    if not os.path.exists(os.path.join(output_data_path, 'val')):
        os.mkdir(os.path.join(output_data_path, 'val'))
    if not os.path.exists(os.path.join(output_data_path, 'test')):
        os.mkdir(os.path.join(output_data_path, 'test'))
        
    # 获取原始文件下的所有的类别文件的路径
    for file in os.listdir(ori_data_path):
        file_path = os.path.join(ori_data_path, file)
        if not os.path.isdir(file_path):
            continue  # 跳过非目录文件
        
        # 获取每个类别文件下的视频类别名
        video_files = [name for name in os.listdir(file_path)]
        train_and_valid, test = train_test_split(video_files, test_size=0.2, random_state=42)
        train, val = train_test_split(train_and_valid, test_size=0.2, random_state=42)
        
        train_dir = os.path.join(output_data_path, 'train', file)
        val_dir = os.path.join(output_data_path, 'val', file)
        test_dir = os.path.join(output_data_path, 'test', file)
        
        if not os.path.exists(train_dir):
            os.makedirs(train_dir)
        if not os.path.exists(val_dir):
            os.makedirs(val_dir)
        if not os.path.exists(test_dir):
            os.makedirs(test_dir)
            
        for video in train:
            process_video(ori_data_path, video, file, train_dir)
        for video in val:
            process_video(ori_data_path, video, file, val_dir)
        for video in test:
            process_video(ori_data_path, video, file, test_dir)
        print('{}划分完成'.format(file))
    print('所有数据划分完成')

def label_text_write(ori_data_path, out_label_path):
    folder = ori_data_path
    fnames, labels = [], []
    for label in sorted(os.listdir(folder)):
        label_path = os.path.join(folder, label)
        if not os.path.isdir(label_path):
            continue  # 跳过非目录文件
        for fname in os.listdir(label_path):
            fnames.append(os.path.join(folder, label, fname))
            labels.append(label)
            
    label2index = {label: index for index, label in enumerate(sorted(set(labels)))}
    if not os.path.exists(out_label_path + '/labels.txt'):
        with open(out_label_path + '/labels.txt', 'w') as f:
            for id, label in enumerate(sorted(label2index)):
                f.writelines(str(id + 1) + ' ' + label + '\n')

if __name__ == "__main__":
    ori_data_path = 'data/UCF-101'
    out_label_path = 'data'
    output_data_path = 'data/ucf101'
    
    # 生成标签文档
    label_text_write(ori_data_path, out_label_path)
    
    # 划分数据集，生成对应的图片数据集
    preprocess(ori_data_path, output_data_path)

BalanceBeam划分完成
SumoWrestling划分完成
Surfing划分完成
WritingOnBoard划分完成
FloorGymnastics划分完成
Mixing划分完成
TennisSwing划分完成
HorseRiding划分完成
JumpRope划分完成
JumpingJack划分完成
BaseballPitch划分完成
Rowing划分完成
Rafting划分完成
FrontCrawl划分完成
IceDancing划分完成
ParallelBars划分完成
BlowingCandles划分完成
HeadMassage划分完成
SkateBoarding划分完成
WalkingWithDog划分完成
CliffDiving划分完成
Skijet划分完成
Nunchucks划分完成
Billiards划分完成
Bowling划分完成
PullUps划分完成
Swing划分完成
WallPushups划分完成
PoleVault划分完成
HandstandPushups划分完成
Haircut划分完成
SoccerPenalty划分完成
TableTennisShot划分完成
GolfSwing划分完成
Lunges划分完成
PommelHorse划分完成
BreastStroke划分完成
MoppingFloor划分完成
PlayingPiano划分完成
Knitting划分完成
PlayingTabla划分完成
BoxingSpeedBag划分完成
Basketball划分完成
VolleyballSpiking划分完成
SoccerJuggling划分完成
BandMarching划分完成
PlayingCello划分完成
PlayingDhol划分完成
CleanAndJerk划分完成
StillRings划分完成
Typing划分完成
HandstandWalking划分完成
BabyCrawling划分完成
YoYo划分完成
PlayingSitar划分完成
PushUps划分完成
Kayaking划分完成
CuttingInKitchen划分完成
ApplyEyeMakeup划分完成
HulaHoop划分完成
JugglingBalls划分完成
UnevenBars划分完成
PlayingGuitar划分完成
Fencing划分完