In [1]:
import cv2
from PIL import Image
import numpy as np
import os
from moviepy.editor import *
from tqdm import tqdm
import matplotlib.pyplot as plt
from pytorch_lightning import seed_everything
import os
import torch
from einops import rearrange

In [12]:
!which python
# !/home/ubuntu/anaconda3/bin/pip install einops
# !pip install --upgrade paddlehub -i https://pypi.tuna.tsinghua.edu.cn/simple
# !hub install animegan_v2_hayao_99==1.1.0
# !pip install paddleocr --upgrade
# !pip install paddlepaddle
# !pip uninstall opt_einsum --Y
# !pip install opt_einsum
# !hub install animegan_v2_hayao_99==1.1.0
# !hub install animegan_v2_shinkai_53==1.1.0
# ! pip install matplotlib

/usr/bin/python


In [2]:
class HEDdetector:
    def __init__(self):
        remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/network-bsds500.pth"
        modelpath = os.path.join("./", "network-bsds500.pth")
        if not os.path.exists(modelpath):
            from basicsr.utils.download_util import load_file_from_url
            load_file_from_url(remote_model_path, model_dir="./")
        self.netNetwork = Network(modelpath).cuda().eval()

    def __call__(self, input_image):
        assert input_image.ndim == 3
        input_image = input_image[:, :, ::-1].copy()
        with torch.no_grad():
            image_hed = torch.from_numpy(input_image).float().cuda()
            image_hed = image_hed / 255.0
            image_hed = rearrange(image_hed, 'h w c -> 1 c h w')
            edge = self.netNetwork(image_hed)[0]
            edge = (edge.cpu().numpy() * 255.0).clip(0, 255).astype(np.uint8)
            return edge[0]

In [3]:
class Network(torch.nn.Module):
    def __init__(self, model_path):
        super().__init__()

        self.netVggOne = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(inplace=False),
            torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(inplace=False)
        )

        self.netVggTwo = torch.nn.Sequential(
            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(inplace=False),
            torch.nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(inplace=False)
        )

        self.netVggThr = torch.nn.Sequential(
            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(inplace=False),
            torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(inplace=False),
            torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(inplace=False)
        )

        self.netVggFou = torch.nn.Sequential(
            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(inplace=False),
            torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(inplace=False),
            torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(inplace=False)
        )

        self.netVggFiv = torch.nn.Sequential(
            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(inplace=False),
            torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(inplace=False),
            torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(inplace=False)
        )

        self.netScoreOne = torch.nn.Conv2d(in_channels=64, out_channels=1, kernel_size=1, stride=1, padding=0)
        self.netScoreTwo = torch.nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0)
        self.netScoreThr = torch.nn.Conv2d(in_channels=256, out_channels=1, kernel_size=1, stride=1, padding=0)
        self.netScoreFou = torch.nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1, padding=0)
        self.netScoreFiv = torch.nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1, padding=0)

        self.netCombine = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=5, out_channels=1, kernel_size=1, stride=1, padding=0),
            torch.nn.Sigmoid()
        )

        self.load_state_dict({strKey.replace('module', 'net'): tenWeight for strKey, tenWeight in torch.load(model_path).items()})

    def forward(self, tenInput):
        tenInput = tenInput * 255.0
        tenInput = tenInput - torch.tensor(data=[104.00698793, 116.66876762, 122.67891434], dtype=tenInput.dtype, device=tenInput.device).view(1, 3, 1, 1)

        tenVggOne = self.netVggOne(tenInput)
        tenVggTwo = self.netVggTwo(tenVggOne)
        tenVggThr = self.netVggThr(tenVggTwo)
        tenVggFou = self.netVggFou(tenVggThr)
        tenVggFiv = self.netVggFiv(tenVggFou)

        tenScoreOne = self.netScoreOne(tenVggOne)
        tenScoreTwo = self.netScoreTwo(tenVggTwo)
        tenScoreThr = self.netScoreThr(tenVggThr)
        tenScoreFou = self.netScoreFou(tenVggFou)
        tenScoreFiv = self.netScoreFiv(tenVggFiv)

        tenScoreOne = torch.nn.functional.interpolate(input=tenScoreOne, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
        tenScoreTwo = torch.nn.functional.interpolate(input=tenScoreTwo, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
        tenScoreThr = torch.nn.functional.interpolate(input=tenScoreThr, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
        tenScoreFou = torch.nn.functional.interpolate(input=tenScoreFou, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
        tenScoreFiv = torch.nn.functional.interpolate(input=tenScoreFiv, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)

        return self.netCombine(torch.cat([ tenScoreOne, tenScoreTwo, tenScoreThr, tenScoreFou, tenScoreFiv ], 1))

def resize_image(input_image, resolution):
    H, W, C = input_image.shape
    H = float(H)
    W = float(W)
    k = float(resolution) / min(H, W)
    H *= k
    W *= k
    H = int(np.round(H / 64.0)) * 64
    W = int(np.round(W / 64.0)) * 64
    img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
    return img

In [4]:
def HWC3(x):
    assert x.dtype == np.uint8
    if x.ndim == 2:
        x = x[:, :, None]
    assert x.ndim == 3
    H, W, C = x.shape
    assert C == 1 or C == 3 or C == 4
    if C == 3:
        return x
    if C == 1:
        return np.concatenate([x, x, x], axis=2)
    if C == 4:
        color = x[:, :, 0:3].astype(np.float32)
        alpha = x[:, :, 3:4].astype(np.float32) / 255.0
        y = color * alpha + 255.0 * (1.0 - alpha)
        y = y.clip(0, 255).astype(np.uint8)
        return y

In [5]:
import os
current_path = os.getcwd()
print(current_path)

/home/ubuntu/AI二次元/0319_jiaran_yao


In [6]:
# 原始视频地址
original_video_path = '嘉然摇_Cut.mp4'
# 提取视频图像的存放地址
original_video_img_path = 'input_images/'
# 输出图像的存放地址 (Canny edge)
transfered_img_path = "transfered_images/"
# 合成视频存放地址
img2video_path = 'canny_output.mp4'
# 添加声音后的视频最终输出地址
output_video_path = 'canny_output_with_sound.mp4'

In [19]:
# 从视频提取图片
def video2img(video_path, out_path):
    cap = cv2.VideoCapture(video_path)
    fgbg = cv2.createBackgroundSubtractorMOG2()
    i=1
    while True:
        ret, frame = cap.read()
#         fgmask = fgbg.apply(frame)
#         print(frame.shape)
#         print(fgmask.shape)
        if frame is None:
            break
        else:
            cv2.imwrite(out_path + str(i) + ".png", frame)
            i+=1
    return

In [20]:
# 把图片转Canny
def ani2video(img_path, org_video_path, out_path):
    apply_hed = HEDdetector()
    # 获取图片总数
    file_list = os.listdir(img_path)
    img_num = len(file_list)

    # 查看原始视频的参数
#     cap = cv2.VideoCapture(org_video_path)
#     ret, frame = cap.read()
    # 任选一张图片查看高度和宽度
    height = 512
    width = 512

    fps = cap.get(cv2.CAP_PROP_FPS)  # 返回视频的fps--帧率
    
    isExist = os.path.exists(transfered_img_path)
    if not isExist:
       # Create a new directory because it does not exist
       os.makedirs(transfered_img_path)

    # 把参数用到我们要创建的视频上
    video = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, (width, height))  # 创建视频流对象
    """
    参数1 即将保存的文件路径
    参数2 VideoWriter_fourcc为视频编解码器 cv2.VideoWriter_fourcc('m', 'p', '4', 'v') 文件名后缀为.mp4
    参数3 为帧播放速率
    参数4 (width,height)为视频帧大小
    """
    for i in tqdm(range(img_num)):
        f_name = str(i + 1) + '.png'
        item = os.path.join(img_path, f_name)
#         print(item)
        image = cv2.imread(item)
        detected_map = apply_hed(resize_image(image, 512))
        detected_map = HWC3(detected_map)
        cv2.imwrite(transfered_img_path + f_name, detected_map) 
#         video.write(detected_map)  # 把图片写进视频
#     video.release()  # 释放

In [19]:
# 从原始视频上提取声音合成到新生成的视频上
def sound2video(org_video_path, new_video_path, out_video_path):
    # 读取原始视频
    video_o = VideoFileClip(org_video_path)
    # 获取原始视频的音频部分
    audio_o = video_o.audio

    # 读取新生成视频
    video_clip = VideoFileClip(new_video_path)
    # 指向新生成视频的音频部分
    video_clip2 = video_clip.set_audio(audio_o)
    # 修改音频部分并输出最终视频
    video_clip2.write_videofile(out_video_path)

#  从视频提取图片

In [22]:
# 第一步：视频->图像
# if not os.path.exists(original_video_img_path):
#     os.mkdir(original_video_img_path)
# video2img(video_path=original_video_path, out_path=original_video_img_path)

# 图像转换为canny并合成新的视频

In [23]:
# # 该模型需要gpu跑，所以需要设置下gpu环境
# %env CUDA_VISIBLE_DEVICES=0
# %matplotlib inline

In [24]:
fps = 30

# 获取图片总数
file_list = os.listdir(transfered_img_path)
file_num = len(file_list)

# 获取视频长宽
item = os.path.join(transfered_img_path, str(1) + '.png')
width, height, _ = cv2.imread(item).shape
print(width, height)
video = cv2.VideoWriter("output_hed.mp4",  cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, (height, width))  # 创建视频流对象


for i in range(file_num):
    item = os.path.join(transfered_img_path, str(i + 1) + '.png')
    if os.path.exists(item):
#         print("read",item)
        print(cv2.imread(item).shape)
        video.write(cv2.imread(item))  # 把图片写进视频
video.release()  # 释放

832 512
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3)
(832, 512, 3

# 添加原始音频

In [21]:
# 第三步：加上原始音频
if not os.path.exists(output_video_path):
    sound2video(org_video_path=original_video_path, new_video_path="output_hed.mp4", out_video_path=output_video_path)
else:
    print('最终视频已存在，请查看输出路径')

Moviepy - Building video canny_output_with_sound.mp4.
MoviePy - Writing audio in canny_output_with_soundTEMP_MPY_wvf_snd.mp3


                                                                    

MoviePy - Done.
Moviepy - Writing video canny_output_with_sound.mp4



                                                               

Moviepy - Done !
Moviepy - video ready canny_output_with_sound.mp4


In [None]:
original_video_path, img2video_path, output_video_path