In [None]:
import argparse
import os
import time
from datetime import datetime
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.preprocessing import OneHotEncoder
from lstm_model import LSTMModel


In [None]:
class Args:
    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            setattr(self, key, value)

# 定义参数
dataset_dir = "/data/yangyihui/usr_dir/bank_project/Video-based-Action-Recognition-Pytorch/myvediodataset"
classes_list = ['openfile1', 'openmail1','打开金途官网']
image_height = 64
image_width = 64
sequence_length = 20
seed = 27
epochs = 100
batch_size = 4

# 将参数封装成一个对象
args = Args(
    dataset_dir=dataset_dir,
    classes_list=classes_list,
    image_height=image_height,
    image_width=image_width,
    sequence_length=sequence_length,
    seed=seed,
    epochs=epochs,
    batch_size=batch_size
)

In [None]:
seed_constant = 27
np.random.seed(seed_constant)
random.seed(seed_constant)
torch.manual_seed(seed_constant)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed_constant)
print('program begin')

In [None]:
# 创建数据集
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
lstm_model=LSTMModel(dataset_dir=args.dataset_dir,classes_list=args.classes_list,image_height=args.image_height,image_width=args.image_width,sequence_length=args.sequence_length)

print('Begin creating dataset')
features, labels, _ = lstm_model.create_dataset(classes_list=args.classes_list,dataset_dir=args.dataset_dir)
print('Dataset created successfully')
print(features.shape)
print(labels.shape)
print(labels)


In [None]:

from torch.utils.data import TensorDataset, DataLoader
from scipy.sparse import issparse
print(features.shape)
print(labels.shape)
print(labels)
features_tensor = torch.from_numpy(features)
labels_tensor = torch.from_numpy(labels)
dataset = TensorDataset(features_tensor, labels_tensor)

# 使用 random_split 来分割数据集    
train_size = int(0.75 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# 创建 DataLoader
batch_size = args.batch_size
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:

import torch
from sklearn.model_selection import train_test_split
from datetime import datetime
import torch.nn.functional as F
import torch
from lstm_model import LSTMModel


model = lstm_model.create_LRCN_model()
print("Model created successfully!")

In [None]:
# 开始训练
torch.backends.cudnn.enabled = False
model=lstm_model.train(model,train_loader,test_loader,device,100)

# 保存模型
current_date_time_string = datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
model_file_name = f'LSTM_model_{current_date_time_string}.pth'
torch.save(model.state_dict(), model_file_name)




In [None]:
import torch
import cv2
import numpy as np
from moviepy.editor import VideoFileClip
import argparse
import os
from collections import deque
# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 图像尺寸
IMAGE_HEIGHT, IMAGE_WIDTH = 64, 64

def predict_on_video(video_file_path, output_file_path, SEQUENCE_LENGTH, model, CLASSES_LIST, confidence_threshold=0.9):
    video_reader = cv2.VideoCapture(video_file_path)

    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))

    video_writer = cv2.VideoWriter(output_file_path, cv2.VideoWriter_fourcc('M', 'P', '4', 'V'),
                                  video_reader.get(cv2.CAP_PROP_FPS), (original_video_width, original_video_height))

    frames_queue = deque(maxlen=SEQUENCE_LENGTH)

    predicted_class_name = ''

    while video_reader.isOpened():
        ok, frame = video_reader.read()
        if not ok:
            break
        if not frame.size:
            continue
        resized_frame = cv2.resize(frame, (IMAGE_WIDTH, IMAGE_HEIGHT))
        normalized_frame = resized_frame / 255.0
        frames_queue.append(normalized_frame)
        if len(frames_queue) == SEQUENCE_LENGTH:
            frame_list=list(frames_queue)
            input_list=[]
            input_list.append(frame_list)
            input_tensor = torch.tensor(input_list).to(device).float().to(device)
            # print(input_tensor.shape)
            model=model.to(device)
            predicted_labels_probabilities = model(input_tensor)
            #print(predicted_labels_probabilities)
            probability_tensor = F.softmax(predicted_labels_probabilities, dim=1)

            # 将 PyTorch 张量转换为 CPU 上的张量，然后转换为 NumPy 数组
            probability_array = probability_tensor.detach().cpu().numpy()
            print(probability_array)

            probability=probability_array[0]
            predicted_label = np.argmax(probability)
            predicted_confidence = probability[predicted_label]
            if predicted_confidence >= confidence_threshold:
                predicted_class_name = CLASSES_LIST[predicted_label]
                # 在帧上绘制预测类别
                cv2.putText(frame, predicted_class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            # 将帧写入视频文件
        
        video_writer.write(frame)
    video_reader.release()
    video_writer.release()


# model = torch.load('/data/yangyihui/usr_dir/bank_project/Video-based-Action-Recognition-Pytorch/LSTM_model_2024_11_07_15_18_54.pth', map_location=device)
# model.eval()
lstm_model=LSTMModel(dataset_dir=args.dataset_dir,classes_list=args.classes_list,image_height=args.image_height,image_width=args.image_width,sequence_length=args.sequence_length)

model = lstm_model.create_LRCN_model()
print("Model created successfully!")
model_path = '/data/yangyihui/usr_dir/bank_project/Video-based-Action-Recognition-Pytorch/LSTM_model_2024_11_07_18_53_29.pth'  # 替换为您的模型文件路径
state_dict = torch.load(model_path, map_location=device)
model.load_state_dict(state_dict)

input_video_file_path ='/data/yangyihui/usr_dir/bank_project/Video-based-Action-Recognition-Pytorch/test_videos/背景.mp4'
output_dir = './'
output_video_name = 'test.mp4'
output_video_file_path = os.path.join(output_dir, output_video_name)
predict_on_video(input_video_file_path, output_video_file_path, args.sequence_length, model, args.classes_list)

processed_video = VideoFileClip(output_video_file_path, audio=False, target_resolution=(300, None))
os.remove(output_video_file_path)
processed_video.write_videofile(output_video_file_path, codec='libx264', audio=False)
processed_video.ipython_display(maxduration=1800)


In [None]:
import torch
import cv2
import numpy as np
from moviepy.editor import VideoFileClip
import argparse
import os
from collections import deque
# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 图像尺寸
IMAGE_HEIGHT, IMAGE_WIDTH = 64, 64

def predict_on_video(video_file_path, output_file_path):
    video_reader = cv2.VideoCapture(video_file_path)

    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))

    video_writer = cv2.VideoWriter(output_file_path, cv2.VideoWriter_fourcc('M', 'P', '4', 'V'),
                                  video_reader.get(cv2.CAP_PROP_FPS), (original_video_width, original_video_height))
    video_writer.set(cv2.VIDEOWRITER_PROP_QUALITY, 200000)

    while video_reader.isOpened():
        ok, frame = video_reader.read()
        if not ok:
            break
        if not frame.size:
            continue
        
        video_writer.write(frame)
    video_reader.release()
    video_writer.release()




input_video_file_path ='/data/yangyihui/usr_dir/bank_project/Video-based-Action-Recognition-Pytorch/test_videos/背景.mp4'
output_dir = './'
output_video_name = 'test2.mp4'
output_video_file_path = os.path.join(output_dir, output_video_name)
predict_on_video(input_video_file_path, output_video_file_path)

processed_video = VideoFileClip(output_video_file_path, audio=False, target_resolution=(300, None))
os.remove(output_video_file_path)
processed_video.write_videofile(output_video_file_path, codec='libx264', audio=False)
processed_video.ipython_display(maxduration=1800)


OpenCV: FFMPEG: tag 0x5634504d/'MP4V' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


Moviepy - Building video ./test1.mp4.
Moviepy - Writing video ./test1.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready ./test1.mp4
Moviepy - Building video __temp__.mp4.
Moviepy - Writing video __temp__.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready __temp__.mp4


