conda create -n dc-control python=3.10
conda activate dc-control
pip install -r requirements.txt| Model | Path |
|---|---|
| Content Encoder | ControlNetPlus-SDXL |
| Intra-Element Controller | yang1232009/DC-ControlNet |
| Inter-Element Controller | Coming soon |
You can use the pretrained ControlNet model based on SDXL.
from pipelines.pipeline import StableDiffusionXLControlNetUnionPipeline
from models.controlnet_union import ControlNetModel_Union
from diffusers import AutoencoderKL, UNet2DConditionModel, PNDMScheduler
import torch
from diffusers.utils import load_image
controlnet = ControlNetModel_Union.from_pretrained("yang1232009/ControlNetPlus-SDXL").to(torch.float16)
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix").to(torch.float16)
pipeline = StableDiffusionXLControlNetUnionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
controlnet=controlnet,
vae=vae,
)
pipeline.to("cuda")
pipeline.to(torch.float16)
condition_image = load_image("./assets/condtion_images/guitar_normal.png")
condition_image = condition_image.resize((1024, 1024))
image = [0] * 8
image[3] = condition_image
# canny, hed, zoe, normal, sam, dot, box, mask
union_control_type = torch.Tensor([0,0,0,1,0,0,0,0])
prompt = 'A guitar'
positive_prompt = ", ultra highres, sharpness texture, High detail RAW Photo, shallow depth of field, dslr, film grain"
negative_prompt = " blurry, disfigured, ugly, bad, immature, cartoon, anime, 3d, painting, b&w, cartoon, painting, illustration, worst quality, low quality"
generator = torch.Generator(device="cuda").manual_seed(42)
images = pipeline(
prompt=prompt+positive_prompt,
negative_prompt=negative_prompt,
image_list=image,
union_control_type=union_control_type,
num_inference_steps=50,
generator=generator,
num_images_per_prompt=1,
).images[0]
images.save("example.png")
bash train_intra_element_controller.shbash train_inter_element_controller.sh