In [None]:
import torch
import json
from insightface.app import FaceAnalysis
import torch
import cv2

app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
app.prepare(ctx_id=0, det_size=(640, 640))

import os
import torch
from diffusers import StableDiffusionXLPipeline, DDIMScheduler
from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDXL

base_model_path = "SG161222/RealVisXL_V3.0"
ip_ckpt = "models/ip-adapter-faceid_sdxl.bin"
device = "cuda"

noise_scheduler = DDIMScheduler(
    num_train_timesteps=1000,
    beta_start=0.00085,
    beta_end=0.012,
    beta_schedule="scaled_linear",
    clip_sample=False,
    set_alpha_to_one=False,
    steps_offset=1,
)
pipe = StableDiffusionXLPipeline.from_pretrained(
    base_model_path,
    torch_dtype=torch.float16,
    scheduler=noise_scheduler,
    add_watermarker=False,
)

# load ip-adapter
ip_model = IPAdapterFaceIDXL(pipe, ip_ckpt, device)

# inference
num_generation = 4
negative_prompt = "(asymmetry, worst quality, low quality, illustration, 3d, 2d, painting, cartoons, sketch), open mouth"

output_path = "../../outputs/face/ipadapter"
dataset_info_path = "../../pcs_dataset/info.json"

with open(dataset_info_path, 'r') as file:
    data_info = json.load(file)["face"]
id_with_gender = data_info["id_with_gender"]

subjects = os.listdir("../../pcs_dataset/face")
for subject in subjects:
    # define and show the input ID images
    input_folder_name = os.path.join('../../pcs_dataset/face/', subject)
    image_basename_list = os.listdir(input_folder_name)
    image_path_list = sorted([os.path.join(input_folder_name, basename) for basename in image_basename_list])

    input_id_images = []
    for image_path in image_path_list:
        img = cv2.imread(image_path)
        faces = app.get(img)
        faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
        input_id_images.append(faceid_embeds)

    print(f"***** Subject: {subject} *****")

    os.makedirs(os.path.join(output_path, subject), exist_ok=True)

    with open(dataset_info_path, 'r') as file:
        data_info = json.load(file)["face"]
    prompts = data_info["prompt_accessory"] + data_info["prompt_context"] + data_info["prompt_action"] + data_info["prompt_style"]

    for prompt in prompts:
        prompt = prompt.replace("{0} {1}", f"{id_with_gender[subject]} img") + ", high quality"
        print(f"**Prompt**: {prompt}")

        generator = torch.Generator(device=device).manual_seed(42)

        ## Parameter setting
        num_steps = 50
        style_strength_ratio = 20
        start_merge_step = int(float(style_strength_ratio) / 100 * num_steps)
        if start_merge_step > 30:
            start_merge_step = 30

        images = ip_model.generate(
            prompt=prompt, negative_prompt=negative_prompt, faceid_embeds=faceid_embeds, num_samples=2,
            width=1024, height=1024,
            
            num_inference_steps=30, guidance_scale=7.5, generator=generator
        )

        save_path = os.path.join(output_path, subject, f"{prompt}")
        os.makedirs(save_path, exist_ok=True)

        for img_idx, img in enumerate(images):
            img.save(os.path.join(save_path, f"{img_idx:04d}.jpg"))


## Testing

In [None]:
import os, sys, json

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

utils_path = os.path.abspath(os.path.join('../..'))
sys.path.append(utils_path)

import torch
import numpy as np
from PIL import Image
from utils.clip_eval import evaluate_t2i
from utils.face_eval import calculate_identity_similarity

def convert_to_native(data):
    if isinstance(data, np.ndarray):
        return data.tolist()
    elif isinstance(data, np.generic):
        return data.item()
    elif isinstance(data, dict):
        return {key: convert_to_native(value) for key, value in data.items()}
    elif isinstance(data, list):
        return [convert_to_native(item) for item in data]
    else:
        return data
    
def add_evaluation(file_path, new_data):
    # check whether the file exist
    if not os.path.exists(file_path):
        with open(file_path, 'w') as file:
            json.dump(convert_to_native(new_data), file, indent=4)
    else:
        with open(file_path, 'r') as file:
            data = json.load(file)
        data.update(convert_to_native(new_data))
        with open(file_path, 'w') as file:
            json.dump(data, file, indent=4)

# paths
outputs_path = "../../outputs/face/ipadapter"
eval_res_path = "../../eval_results/face/ipadapter"
dataset_path = "../../pcs_dataset/face"
dataset_info_path = "../../pcs_dataset/info.json"
id_list = os.listdir(outputs_path)
os.makedirs(eval_res_path, exist_ok=True)

# prompt prepare
with open(dataset_info_path, 'r') as file:
    data_info = json.load(file)["face"]
prompts = data_info["prompt_accessory"] + data_info["prompt_context"] + data_info["prompt_action"] + data_info["prompt_style"]
id_with_gender = data_info["id_with_gender"]

# test for each id
for id in id_list:
    evaluation_res = dict()
    print(f"***** Face ID: {id} *****")
    res_for_each_subject =dict()

    # reference image
    ref_image = Image.open(os.path.join(dataset_path, id, "face.jpg")).convert("RGB")

    # test for each prompt
    for prompt in prompts:
        prompt_eval = prompt.replace("{0} {1}", f"{id_with_gender[id]}")
        print(f"**Prompt**: {prompt_eval}")
        dir_prompt = prompt.replace("{0} {1}", "f l") + ", high quality"
        res_for_each_prompt =dict()

        # test for each image 
        for generate_img_name in os.listdir(os.path.join(outputs_path, id, dir_prompt)):
            generate_img_path = os.path.join(outputs_path, id, dir_prompt, generate_img_name)
            
            text_similarity = evaluate_t2i(generate_img_path, prompt_eval)

            generated_image = Image.open(generate_img_path).convert("RGB")
            identity_similarity = float(calculate_identity_similarity(device=torch.device("cuda"), generated_image=generated_image, ref_image=ref_image))

            res_for_each_prompt[generate_img_name] = [identity_similarity, text_similarity]
        
        res_for_each_subject[prompt] = res_for_each_prompt
        print(res_for_each_prompt)

    evaluation_res[id] = res_for_each_subject

    add_evaluation(os.path.join(eval_res_path, "evaluation_results.json"), evaluation_res)

import json

eval_res_path = "../../eval_results/face/ipadapter/evaluation_results.json"

with open(eval_res_path, "r") as f:
    data = json.load(f)

img_sim = 0.0
text_sim = 0.0
cnt = 0

for subject in data:
    for prompt in data[subject]:
        for sample in data[subject][prompt]:
            img_sim = img_sim + data[subject][prompt][sample][0]
            text_sim = text_sim + data[subject][prompt][sample][1]
            cnt = cnt + 1
print("Identity Similarity: ", img_sim/cnt, "\nText Similarity:", text_sim/cnt)