In [36]:
from omegaconf import OmegaConf, DictConfig
def load_config(path ="configs/tea-pour.yaml", print_config = True):

    config = OmegaConf.load("configs/tea-pour.yaml")

    # Recursively merge base configs
    cur_config_path = "configs/tea-pour.yaml"
    cur_config = config
    while "base_config" in cur_config and cur_config.base_config != cur_config_path:
        base_config = OmegaConf.load(cur_config.base_config)
        config = OmegaConf.merge(base_config, config)
        cur_config_path = cur_config.base_config
        cur_config = base_config

    prompt = config.generation.prompt
    if isinstance(prompt, str):
        prompt = {"edit": prompt}
    config.generation.prompt = prompt
    OmegaConf.resolve(config)
    if print_config:
        print("[INFO] loaded config:")
        print(OmegaConf.to_yaml(config))
    
    return config

In [38]:
from invert import Inverter
from generate import Generator
from utils import init_model, seed_everything, get_frame_ids
config = load_config()

[INFO] loaded config:
sd_version: '1.5'
model_key: null
input_path: data/tea-pour.mp4
work_dir: outputs/tea-pour
height: 512
width: 512
inversion:
  save_path: outputs/tea-pour/latents
  prompt: a tea pot pouring tea into a cup.
  n_frames: null
  steps: 50
  save_intermediate: false
  save_steps: 50
  use_blip: false
  recon: false
  control: none
  control_scale: 1.0
  batch_size: 8
  force: false
generation:
  control: depth
  pnp_attn_t: 0.5
  pnp_f_t: 0.8
  control_scale: 1.0
  guidance_scale: 7.5
  n_timesteps: 50
  negative_prompt: ugly, blurry, low res
  prompt:
    vim: a tea pot pouring tea into a cup.
    vector: vector illustration of a tea pot pouring tea into a cup.
  latents_path: outputs/tea-pour/latents
  output_path: outputs/tea-pour
  chunk_size: 4
  chunk_ord: mix-4
  local_merge_ratio: 0.95
  merge_global: true
  global_merge_ratio: 0.9
  global_rand: 0.1
  align_batch: true
  frame_range:
  - 64
  frame_ids: null
  save_frame: true
  use_lora: false
seed: 123
devi

In [37]:
load_config("configs/default.yaml",True)

[INFO] loaded config:
sd_version: '1.5'
model_key: null
input_path: data/tea-pour.mp4
work_dir: outputs/tea-pour
height: 512
width: 512
inversion:
  save_path: outputs/tea-pour/latents
  prompt: a tea pot pouring tea into a cup.
  n_frames: null
  steps: 50
  save_intermediate: false
  save_steps: 50
  use_blip: false
  recon: false
  control: none
  control_scale: 1.0
  batch_size: 8
  force: false
generation:
  control: depth
  pnp_attn_t: 0.5
  pnp_f_t: 0.8
  control_scale: 1.0
  guidance_scale: 7.5
  n_timesteps: 50
  negative_prompt: ugly, blurry, low res
  prompt:
    vim: a tea pot pouring tea into a cup.
    vector: vector illustration of a tea pot pouring tea into a cup.
  latents_path: outputs/tea-pour/latents
  output_path: outputs/tea-pour
  chunk_size: 4
  chunk_ord: mix-4
  local_merge_ratio: 0.95
  merge_global: true
  global_merge_ratio: 0.9
  global_rand: 0.1
  align_batch: true
  frame_range:
  - 64
  frame_ids: null
  save_frame: true
  use_lora: false
seed: 123
devi

{'sd_version': '1.5', 'model_key': None, 'input_path': 'data/tea-pour.mp4', 'work_dir': 'outputs/tea-pour', 'height': 512, 'width': 512, 'inversion': {'save_path': 'outputs/tea-pour/latents', 'prompt': 'a tea pot pouring tea into a cup.', 'n_frames': None, 'steps': 50, 'save_intermediate': False, 'save_steps': 50, 'use_blip': False, 'recon': False, 'control': 'none', 'control_scale': 1.0, 'batch_size': 8, 'force': False}, 'generation': {'control': 'depth', 'pnp_attn_t': 0.5, 'pnp_f_t': 0.8, 'control_scale': 1.0, 'guidance_scale': 7.5, 'n_timesteps': 50, 'negative_prompt': 'ugly, blurry, low res', 'prompt': {'vim': 'a tea pot pouring tea into a cup.', 'vector': 'vector illustration of a tea pot pouring tea into a cup.'}, 'latents_path': 'outputs/tea-pour/latents', 'output_path': 'outputs/tea-pour', 'chunk_size': 4, 'chunk_ord': 'mix-4', 'local_merge_ratio': 0.95, 'merge_global': True, 'global_merge_ratio': 0.9, 'global_rand': 0.1, 'align_batch': True, 'frame_range': [64], 'frame_ids': N

In [34]:
config.inversion

{'save_path': 'outputs/tea-pour/latents', 'prompt': 'a tea pot pouring tea into a cup.', 'n_frames': None, 'steps': 50, 'save_intermediate': False, 'save_steps': 50, 'use_blip': False, 'recon': False, 'control': 'none', 'control_scale': 1.0, 'batch_size': 8, 'force': False}

In [32]:
# pipe, scheduler, model_key = init_model(
#         "cuda", config.sd_version, config.model_key, config.generation.control, config.float_precision)
# config.model_key = model_key
# seed_everything(config.seed)
import torch
pipe, scheduler, model_key = init_model(
        device = "cuda",model_key = "runwayml/stable-diffusion-v1-5", control_type = config.generation.control, weight_dtype = "fp16")
inversion = Inverter(pipe, scheduler, config)
# print("Start inversion!")
# inversion = Inverter(pipe, scheduler, config)
# inversion(config.input_path, config.inversion.save_path)


[INFO] loading custome model from: runwayml/stable-diffusion-v1-5
[INFO] loading controlnet from: lllyasviel/control_v11f1p_sd15_depth
[INFO] loaded controlnet!


Loading pipeline components...: 100%|██████████| 7/7 [00:00<00:00,  8.67it/s]


In [24]:
config

{'sd_version': '1.5', 'model_key': 'runwayml/stable-diffusion-v1-5', 'input_path': 'data/tea-pour.mp4', 'work_dir': 'outputs/tea-pour', 'height': 512, 'width': 512, 'inversion': {'save_path': 'outputs/tea-pour/latents', 'prompt': 'a tea pot pouring tea into a cup.', 'n_frames': None, 'steps': 50, 'save_intermediate': False, 'save_steps': 50, 'use_blip': False, 'recon': False, 'control': 'none', 'control_scale': 1.0, 'batch_size': 8, 'force': False}, 'generation': {'control': 'depth', 'pnp_attn_t': 0.5, 'pnp_f_t': 0.8, 'control_scale': 1.0, 'guidance_scale': 7.5, 'n_timesteps': 50, 'negative_prompt': 'ugly, blurry, low res', 'prompt': {'vim': 'a tea pot pouring tea into a cup.', 'vector': 'vector illustration of a tea pot pouring tea into a cup.'}, 'latents_path': 'outputs/tea-pour/latents', 'output_path': 'outputs/tea-pour', 'chunk_size': 4, 'chunk_ord': 'mix-4', 'local_merge_ratio': 0.95, 'merge_global': True, 'global_merge_ratio': 0.9, 'global_rand': 0.1, 'align_batch': True, 'frame_

In [None]:
class VidtoMe():
    def __init__(self, ckpt: str = "runwayml/stable-diffusion-v1-5", precision: torch.dtype = torch.float16, device: torch.device = torch.device("cuda")):
        from utils import init_model, seed_everything, get_frame_ids
        pipe, scheduler, model_key = init_model(
        "cuda",  ckpt, config.generation.control, precision)