In [8]:
import torch
import numpy as np
import random
import os
import json
from PIL import Image

from diffusers.utils import load_image
from diffusers import EulerDiscreteScheduler, DDIMScheduler
from huggingface_hub import hf_hub_download

from photomaker import PhotoMakerStableDiffusionXLPipeline

In [9]:
# gloal variable and function
def image_grid(imgs, rows, cols, size_after_resize):
    assert len(imgs) == rows*cols

    w, h = size_after_resize, size_after_resize

    grid = Image.new('RGB', size=(cols*w, rows*h))
    grid_w, grid_h = grid.size

    for i, img in enumerate(imgs):
        img = img.resize((w,h))
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

base_model_path = 'SG161222/RealVisXL_V3.0'
device = "cuda"
save_path = "./outputs"

In [10]:
from huggingface_hub import hf_hub_download

photomaker_ckpt = hf_hub_download(repo_id="TencentARC/PhotoMaker", filename="photomaker-v1.bin", repo_type="model")

pipe = PhotoMakerStableDiffusionXLPipeline.from_pretrained(
    base_model_path,
    torch_dtype=torch.bfloat16,
    use_safetensors=True,
    variant="fp16",
).to(device)

pipe.load_photomaker_adapter(
    os.path.dirname(photomaker_ckpt),
    subfolder="",
    weight_name=os.path.basename(photomaker_ckpt),
    trigger_word="img"
)
pipe.id_encoder.to(device)


#pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
#pipe.fuse_lora()

pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
# pipe.set_adapters(["photomaker"], adapter_weights=[1.0])
pipe.fuse_lora()

Loading pipeline components...: 100%|██████████| 7/7 [00:01<00:00,  3.52it/s]


Loading PhotoMaker components [1] id_encoder from [/home/huwentao/.cache/huggingface/hub/models--TencentARC--PhotoMaker/snapshots/d7ec3fc17290263135825194aeb3bc456da67cc5]...
Loading PhotoMaker components [2] lora_weights from [/home/huwentao/.cache/huggingface/hub/models--TencentARC--PhotoMaker/snapshots/d7ec3fc17290263135825194aeb3bc456da67cc5]


In [None]:
# Single Subject Generation
single_subject = []                            # "backpack"
# Single Prompt Generation
single_prompt = []                            # e.g. ["a {0} {1} near the pool"]

num_generation = 4
negative_prompt = "(asymmetry, worst quality, low quality, illustration, 3d, 2d, painting, cartoons, sketch), open mouth"

output_path = "../../outputs/face/photomaker"
subjects = os.listdir("../../pcs_dataset/face")
dataset_info_path = "../../pcs_dataset/info.json"

with open(dataset_info_path, 'r') as file:
    data_info = json.load(file)["face"]

id_with_gender = data_info["id_with_gender"]

if len(single_subject):
    subjects = single_subject

for subject in subjects:
    
    # define and show the input ID images
    input_folder_name = os.path.join('../../pcs_dataset/face/', subject)
    image_basename_list = os.listdir(input_folder_name)
    image_path_list = sorted([os.path.join(input_folder_name, basename) for basename in image_basename_list])

    input_id_images = []
    for image_path in image_path_list:
        input_id_images.append(load_image(image_path))

    print(f"***** Subject: {subject} *****")

    os.makedirs(os.path.join(output_path, subject), exist_ok=True)

    if len(single_prompt):
        prompts = single_prompt
    else:
        with open(dataset_info_path, 'r') as file:
          data_info = json.load(file)["face"]
        prompts = data_info["prompt_accessory"] + data_info["prompt_context"] + data_info["prompt_action"] + data_info["prompt_style"]

    for prompt in prompts:
        prompt = prompt.replace("{0} {1}", f"{id_with_gender[subject]} img") + ", high quality"
        print(f"**Prompt**: {prompt}")

        generator = torch.Generator(device=device).manual_seed(42)

        ## Parameter setting
        num_steps = 50
        style_strength_ratio = 20
        start_merge_step = int(float(style_strength_ratio) / 100 * num_steps)
        if start_merge_step > 30:
            start_merge_step = 30

        images = pipe(
            prompt=prompt,
            input_id_images=input_id_images,
            negative_prompt=negative_prompt,
            num_images_per_prompt=num_generation,
            num_inference_steps=num_steps,
            start_merge_step=start_merge_step,
            generator=generator,
        ).images

        save_path = os.path.join(output_path, subject, f"{prompt}")
        os.makedirs(save_path, exist_ok=True)

        for img_idx, img in enumerate(images):
            img.save(os.path.join(save_path, f"{img_idx:04d}.jpg"))
    

## Evaluation

### Calculate the similarity for each sample
Calculate img to img similarity and text to img similarity by CLIP Evaluator

In [None]:
import os, sys, json

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

utils_path = os.path.abspath(os.path.join('../..'))
sys.path.append(utils_path)

import torch
import numpy as np
from PIL import Image
from utils.clip_eval import evaluate_t2i
from utils.face_eval import calculate_identity_similarity

In [None]:
def convert_to_native(data):
    if isinstance(data, np.ndarray):
        return data.tolist()
    elif isinstance(data, np.generic):
        return data.item()
    elif isinstance(data, dict):
        return {key: convert_to_native(value) for key, value in data.items()}
    elif isinstance(data, list):
        return [convert_to_native(item) for item in data]
    else:
        return data
    
def add_evaluation(file_path, new_data):
    # check whether the file exist
    if not os.path.exists(file_path):
        with open(file_path, 'w') as file:
            json.dump(convert_to_native(new_data), file, indent=4)
    else:
        with open(file_path, 'r') as file:
            data = json.load(file)
        data.update(convert_to_native(new_data))
        with open(file_path, 'w') as file:
            json.dump(data, file, indent=4)

In [None]:
outputs_path = "../../outputs/face/photomaker"
eval_res_path = "../../eval_results/face/photomaker"
dataset_path = "../../pcs_dataset/face"
dataset_info_path = "../../pcs_dataset/info.json"

with open(dataset_info_path, 'r') as file:
    data_info = json.load(file)["face"]

id_with_gender = data_info["id_with_gender"]

os.makedirs(eval_res_path, exist_ok=True)

id_list = os.listdir(outputs_path)

for id in id_list:
    evaluation_res = dict()
    print(f"***** Face ID: {id} *****")

    res_for_each_subject =dict()

    ref_image = Image.open(os.path.join(dataset_path, id, "face.jpg")).convert("RGB")

    for prompt in os.listdir(os.path.join(outputs_path, id)):
        prompt_eval = prompt.replace(", high quality", "")
        print(f"**Prompt**: {prompt_eval}")

        res_for_each_prompt =dict()

        for generate_img_name in os.listdir(os.path.join(outputs_path, id, prompt)):
            generate_img_path = os.path.join(outputs_path, id, prompt, generate_img_name)
            
            text_similarity = evaluate_t2i(generate_img_path, prompt_eval)

            enerated_image = Image.open(generate_img_path).convert("RGB")
            identity_similarity = float(calculate_identity_similarity(device=torch.device("cuda"), generated_image=enerated_image, ref_image=ref_image))

            res_for_each_prompt[generate_img_name] = [identity_similarity, text_similarity]
        
        res_for_each_subject[prompt] = res_for_each_prompt
        print(res_for_each_prompt)

    evaluation_res[id] = res_for_each_subject

    add_evaluation(os.path.join(eval_res_path, "evaluation_results.json"), evaluation_res)

### Calculate the average similarity

In [24]:
import json

eval_res_path = "../../eval_results/face/photomaker/evaluation_results.json"

with open(eval_res_path, "r") as f:
    data = json.load(f)

img_sim = 0.0
text_sim = 0.0
cnt = 0

for subject in data:
    for prompt in data[subject]:
        for sample in data[subject][prompt]:
            img_sim = img_sim + data[subject][prompt][sample][0]
            text_sim = text_sim + data[subject][prompt][sample][1]
            cnt = cnt + 1
print("Identity Similarity: ", img_sim/cnt, "\nText Similarity:", text_sim/cnt)

Identity Similarity:  0.5468374671934483 
Text Similarity: 0.3111436971028646
