In [1]:

import argparse
import contextlib
import gc
import logging
import math
import os
import random
import shutil
from pathlib import Path

import accelerate
import numpy as np
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import ProjectConfiguration, set_seed
from datasets import load_dataset
from huggingface_hub import create_repo, upload_folder
from packaging import version
from PIL import Image
from torchvision import transforms
from tqdm.auto import tqdm
from transformers import AutoTokenizer, PretrainedConfig

import diffusers
from diffusers import (
    AutoencoderKL,
    ControlNetModel,
    DDPMScheduler,
    StableDiffusionControlNetPipeline,
    UNet2DConditionModel,
    UniPCMultistepScheduler,
)
from diffusers.optimization import get_scheduler
from diffusers.utils import check_min_version, is_wandb_available
from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.torch_utils import is_compiled_module

from neuraltexture_controlnet import NeuralTextureControlNetModel


  deprecate("Transformer2DModelOutput", "1.0.0", deprecation_message)


In [2]:
def import_model_class_from_model_name_or_path(
    pretrained_model_name_or_path: str, revision: str
):
    text_encoder_config = PretrainedConfig.from_pretrained(
        pretrained_model_name_or_path,
        subfolder="text_encoder",
        revision=revision,
    )
    model_class = text_encoder_config.architectures[0]

    if model_class == "CLIPTextModel":
        from transformers import CLIPTextModel

        return CLIPTextModel
    
    else:
        raise ValueError(f"{model_class} is not supported.")



text_encoder_cls = import_model_class_from_model_name_or_path(
	"stabilityai/stable-diffusion-2-1-base", None
)
unet = UNet2DConditionModel.from_pretrained(
        "stabilityai/stable-diffusion-2-1-base",
        subfolder="unet",   
    )
weight_dtype = torch.float32
# Load scheduler and models
noise_scheduler = DDPMScheduler.from_pretrained(
	"stabilityai/stable-diffusion-2-1-base", subfolder="scheduler"
)
text_encoder = text_encoder_cls.from_pretrained(
	"stabilityai/stable-diffusion-2-1-base",
	subfolder="text_encoder",
)
vae = AutoencoderKL.from_pretrained(
	"stabilityai/stable-diffusion-2-1-base",
	subfolder="vae"
)
tokenizer = AutoTokenizer.from_pretrained(
	"stabilityai/stable-diffusion-2-1-base",
	subfolder="tokenizer",
	use_fast=False,
)
controlnet_dir = '/data2/diffusion-project/diffusion-project/custom_train/dilightnet-openillum-2-1-2-base-v2'
controlnet = NeuralTextureControlNetModel.from_pretrained(controlnet_dir, torch_dtype=weight_dtype)

You are using a model of type clip_text_model to instantiate a model of type . This is not supported for all configurations of models and can yield errors.


In [9]:
pipeline = StableDiffusionControlNetPipeline.from_pretrained(
        "stabilityai/stable-diffusion-2-1-base",
        tokenizer=tokenizer,
        unet=unet,
        controlnet=controlnet,
        safety_checker=None,
        torch_dtype=weight_dtype)
pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config)
pipeline = pipeline.to('cuda')
pipeline.set_progress_bar_config(disable=True)




Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

In [5]:
from relighting_dataset import RelightingDataset
validation_dataset = RelightingDataset(
	data_jsonl="/data2/diffusion-project/diffusion-project/custom_train/dataset_v2/eval_v2.jsonl",
	pretrained_model= "stabilityai/stable-diffusion-2-1-base",
	channel_aug_ratio= 0,  # add to args
	empty_prompt_ratio= 0,  # add to args
	log_encode_hint=False,  # add to args
	load_mask=True,  # add to args
)


Loading data: 128it [00:00, 177654.17it/s]


In [12]:
batch = validation_dataset[:]

slice(None, None, None) TypeError('list indices must be integers or slices, not str') list indices must be integers or slices, not str


In [None]:
prompt = batch["text"]
validation_image = batch["conditioning_pixel_values"].to(
	'cuda', dtype=weight_dtype
)[None]

images = []
for _ in range(4):
	with torch.autocast("cuda"):
		image = pipeline(
			prompt,
			validation_image,
			num_inference_steps=100,
			
		).images[0]
	images.append(image)

cond_pixels = batch[
	"conditioning_pixel_values"
]  # hints. [mask, ref image , diffuse, 3*ggx]

if args.add_mask:
	cond_pixels = cond_pixels[1:]  # skip mask
# to numpy
cond_pixels = cond_pixels.cpu().numpy()
target_image = batch["pixel_values"].cpu().numpy()
target_image = (target_image / 2.0 + 0.5).transpose(1, 2, 0)

slice(None, None, None) TypeError('list indices must be integers or slices, not str') list indices must be integers or slices, not str


AttributeError: 'dict' object has no attribute 'to'

In [None]:
image = pipeline(prompt, validation_image, num_inference_steps=30).images[0]