AnimateDiff

Paper:
https://arxiv.org/abs/2307.04725

GitHub:
https://github.com/guoyww/AnimateDiff

HuggingFace: 
https://huggingface.co/docs/diffusers/api/pipelines/animatediff

## 1 generate

In [1]:
import torch
from diffusers import AnimateDiffPipeline, DDIMScheduler, MotionAdapter
from diffusers.utils import export_to_gif, export_to_video

# Load the motion adapter
adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2", torch_dtype=torch.float16)
# load SD 1.5 based finetuned model
model_id = "SG161222/Realistic_Vision_V5.1_noVAE"
pipe = AnimateDiffPipeline.from_pretrained(model_id, motion_adapter=adapter, torch_dtype=torch.float16)
scheduler = DDIMScheduler.from_pretrained(
    model_id,
    subfolder="scheduler",
    clip_sample=False,
    timestep_spacing="linspace",
    beta_schedule="linear",
    steps_offset=1,
)
pipe.scheduler = scheduler

# enable memory savings
pipe.enable_vae_slicing()
pipe.enable_model_cpu_offload()

prompt = "Mechanical butterflies"
negative_prompt = "text, watermark, copyright, blurry, low resolution, blur, low quality"

output = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt, 
    num_frames=24,
    guidance_scale=7.5,
    num_inference_steps=200,
    generator=torch.Generator("cpu").manual_seed(42),
)
video_frames = output.frames[0]
# export_to_gif(frames, "animation.gif")
video_path = export_to_video(video_frames, "output_video2.mp4")
video_path

2024-04-29 10:59:25.355309: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-29 10:59:25.355372: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-29 10:59:25.356606: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-29 10:59:25.364931: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
The config attributes {'motion_activation_fn': 'geglu

Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

The config attributes {'center_input_sample': False, 'flip_sin_to_cos': True, 'freq_shift': 0, 'mid_block_type': 'UNetMidBlock2DCrossAttn', 'only_cross_attention': False, 'attention_head_dim': 8, 'dual_cross_attention': False, 'class_embed_type': None, 'addition_embed_type': None, 'num_class_embeds': None, 'upcast_attention': False, 'resnet_time_scale_shift': 'default', 'resnet_skip_time_act': False, 'resnet_out_scale_factor': 1.0, 'time_embedding_type': 'positional', 'time_embedding_dim': None, 'time_embedding_act_fn': None, 'timestep_post_act': None, 'conv_in_kernel': 3, 'conv_out_kernel': 3, 'projection_class_embeddings_input_dim': None, 'class_embeddings_concat': False, 'mid_block_only_cross_attention': None, 'cross_attention_norm': None, 'addition_embed_type_num_heads': 64} were passed to UNetMotionModel, but are not expected and will be ignored. Please verify your config.json configuration file.


  0%|          | 0/200 [00:00<?, ?it/s]

'output_video2.mp4'

## 2 resizing...

In [2]:
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from PIL import Image
import numpy as np

# below is for resizing...
pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_XL", torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()

# memory optimization
pipe.unet.enable_forward_chunking(chunk_size=1, dim=1)
pipe.enable_vae_slicing()

# resize and convert video frames to numpy arrays
video_frames_resized = [(np.array(frame.resize((1024, 576))) / 255.0).astype(np.float32) for frame in video_frames]

video_frames_processed = pipe(prompt, negative_prompt=negative_prompt, video=video_frames_resized, strength=0.6).frames[0]

# have to convert the frames back to PIL images
video_frames_processed_pil = [Image.fromarray((frame * 255).astype(np.uint8)) for frame in video_frames_processed]


video_path = export_to_video(video_frames_processed_pil, "output_video_resized2.mp4")
video_path

text_encoder/model.safetensors not found


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

  return self.fget.__get__(instance, owner)()


  0%|          | 0/30 [00:00<?, ?it/s]

'output_video_resized2.mp4'

## 3 display video

<b>install packages and convert our video, or it won't be displayed properly!</b>

In [None]:
# !apt-get install ffmpeg
# !pip install sk-video

In [5]:
# !ffmpeg -i output_video_resized.mp4 -c:v libx264 output_video_converted.mp4 #this shows logs...
!ffmpeg -hide_banner -loglevel error -i output_video_resized2.mp4 -c:v libx264 output_video_converted2.mp4

In [6]:
import IPython
IPython.display.Video('output_video_converted2.mp4', embed = True)