ModelScope

Paper:
https://arxiv.org/abs/2308.06571

HuggingFace:
https://huggingface.co/docs/diffusers/en/api/pipelines/text_to_video

A good setting guide :
https://zeroscope.replicate.dev/

## 1 generate

In [1]:
import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from diffusers.utils import export_to_video
from PIL import Image

pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch.float16)
pipe.enable_model_cpu_offload()

# memory optimization
pipe.unet.enable_forward_chunking(chunk_size=1, dim=1)
pipe.enable_vae_slicing()

prompt = "A Zen citrus tree surrounded by mechanical butterflies in a futuristic botanical garden, a citrus is in the middle giving a speech, (impressionist paintings), (((highly dynamic))), big movements, (watercolour style), blending of colours, soft gradient, Chinese painter Wu Guanzhong, Edward Hopper"
# prompt = "A 20-year-old young woman lying on the grassland" # 😩 bad result
negative_prompt = "text, watermark, copyright, blurry, low resolution, blur, low quality" 
video_frames = pipe(prompt, negative_prompt=negative_prompt, num_inference_steps=200, num_frames=24).frames[0]
video_path = export_to_video(video_frames, "output_video.mp4")
video_path

text_encoder/model.safetensors not found


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

2024-04-29 07:36:42.616310: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-29 07:36:42.616372: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-29 07:36:42.617434: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-29 07:36:42.625009: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  return self.fget.__get__(instance, owner)()


  0%|          | 0/200 [00:00<?, ?it/s]

'output_video2.2_no_hw_200step_onlypromptnframes_neg_small.mp4'

In [None]:
''' to know more, run help(pipe) '''
# help(pipe)

## 2 resizing...

In [3]:
import numpy as np

# below is for resizing...
pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_XL", torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()

# memory optimization
pipe.unet.enable_forward_chunking(chunk_size=1, dim=1)
pipe.enable_vae_slicing()

# video = [Image.fromarray(frame).resize((1024, 576)) for frame in video_frames]  # ❌ this is not the right way of converting, use the following line instead
# video = [Image.fromarray((frame * 255).astype(np.uint8)).resize((1024, 576)) for frame in video_frames]  # ✅
video = [Image.fromarray((frame * 255).astype(np.uint8)).resize((1024, 576)).convert('RGB') for frame in video_frames] # ✅

video_frames = pipe(prompt, negative_prompt=negative_prompt, video=video, strength=0.6).frames[0]

video_path = export_to_video(video_frames, "output_video_resized.mp4")
video_path

text_encoder/model.safetensors not found


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

'output_video_resized.mp4'

## 3 display video

<b>install packages and convert our video, or it won't be displayed properly!</b>

In [7]:
!apt-get install ffmpeg
!pip install sk-video

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 65 not upgraded.
Collecting sk-video
  Downloading sk_video-1.1.10-py2.py3-none-any.whl.metadata (1.0 kB)
Downloading sk_video-1.1.10-py2.py3-none-any.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: sk-video
Successfully installed sk-video-1.1.10
[0m

In [8]:
# !ffmpeg -i output_video_resized.mp4 -c:v libx264 output_video_converted.mp4 #this shows logs...
!ffmpeg -hide_banner -loglevel error -i output_video_resized.mp4 -c:v libx264 output_video_converted.mp4

In [9]:
import IPython
IPython.display.Video('output_video_converted.mp4', embed = True)