## GPT V3

单个模型(fixed), 直接对接到webui (生成很多个), 所以产生一行一个的结果

In [1]:
from tqdm.auto import tqdm
import gradio as gr
import transformers
import torch
import os


import os
import hashlib
import json
from datetime import datetime
import concurrent.futures


# Assuming DEVICE is already defined
DEVICE = 'cuda'  # Use 'cuda' for GPU or 'cpu' for CPU

# Path to the models' directories
MODEL_DIRS = [
    'pixiv-prompts-gpt-finetunes/8xh100_run2_e2_s50k',
]

# Load models and tokenizers from directories
models = {}
tokenizers = {}
for model_dir in MODEL_DIRS:
    model_name = os.path.basename(os.path.normpath(model_dir))
    models[model_name] = transformers.GPT2LMHeadModel.from_pretrained(model_dir).to(DEVICE)
    tokenizers[model_name] = transformers.AutoTokenizer.from_pretrained(model_dir)


# Hardcoded model name
HARDCODED_MODEL_NAME = os.path.basename(os.path.normpath(MODEL_DIRS[0]))

def generate_text(rating: str, date: str, quality: str, character: str, prompt: str, max_length: int, num_lines: int, selected_models: list):
    """Generate text based on input prompt for selected models, managing multiple lines output."""
    query_prompt = f'<input rating="{rating}" chara="{character}" date="{date}" quality="{quality}" tags="{prompt}">'
    query_prompt += "<output>"

    all_outputs = []
    for model_name in selected_models:
        model = models[model_name].eval()  # Set the model to evaluation mode
        tokenizer = tokenizers[model_name]
        for _ in tqdm(range(num_lines)):
            input_ids = tokenizer.encode(query_prompt, return_tensors='pt').to(DEVICE)
            output_sequences = model.generate(
                input_ids=input_ids,
                max_length=max_length + len(input_ids[0]),
                temperature=1.0,
                top_k=50,
                top_p=0.95,
                repetition_penalty=1.0,
                do_sample=True,
                num_return_sequences=1,
            )

            generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)

            # Find and remove the initial part up to <output>
            start_output = generated_text.find("<output>") + len("<output>")
            generated_text = generated_text[start_output:].strip()

            # Remove the ending </output> tag or truncate at last complete tag
            end_tag = generated_text.find("</output>")
            if end_tag != -1:
                generated_text = generated_text[:end_tag]
            else:
                last_comma = generated_text.rfind(",")
                if last_comma != -1:
                    generated_text = generated_text[:last_comma]

            all_outputs.append(generated_text.strip())

    # Join all outputs into a single string separated by new lines
    return "\n".join(all_outputs)

In [2]:
IMG_OUTPUT_DIR = "generated_images"


def save_images_and_metadata(gend_images, gend_text, prompt):
    # Ensure the output directory exists
    os.makedirs(IMG_OUTPUT_DIR, exist_ok=True)

    for image in gend_images:
        # Get filename
        timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
        img_hash = hashlib.md5(image.tobytes()).hexdigest()        
        file_name = f"{timestamp}_{img_hash[:4]}.webp"        
        file_path = os.path.join(IMG_OUTPUT_DIR, file_name)
        
        # Save image
        image.save(file_path, format='WEBP')

        # Create metadata JSON
        metadata = {
            "prompt": prompt,
            "generated_text": gend_text,
        }
        json_file_name = f"{timestamp}_{img_hash[:4]}.json"
        json_file_path = os.path.join(IMG_OUTPUT_DIR, json_file_name)
        
        with open(json_file_path, 'w') as json_file:
            json.dump(metadata, json_file)

In [4]:
from run_comfy_api import run_workflow, DEFAULT_NEG, DEFAULT_MODEL


HARDCODED_MODEL_NAME = os.path.basename(os.path.normpath(MODEL_DIRS[0]))


def gen_text_and_gen_image(rating: str, date: str, quality: str, character: str, prompt: str, max_length: int, 
                           seed:int, tags_front:str, tags_back:str):
    
    selected_models = [HARDCODED_MODEL_NAME]
    
    # one output, with single model
    gend_text = generate_text(rating, date, quality, character, prompt, max_length, 4, selected_models)

    gend_text = gend_text.split("\n")
    gend_images = []

    for i, txt in enumerate(gend_text):
        print(f"Generated text {i}: {txt}")

        # add quality tags

        txt = ", ".join([tags_front, txt, tags_back])

        _gend_images = run_workflow(pos=txt, 
                                neg="lowres, worst quality, displeasing, bad quality, bad anatomy, text, error, extra digit, cropped, average quality, 2000s",
                                seed = seed,
                                batch_size=1,
                                )
        gend_images.extend(_gend_images)
    
    # Save images and metadata in the background
    with concurrent.futures.ThreadPoolExecutor() as executor:
        executor.submit(save_images_and_metadata, gend_images, gend_text, prompt)
    
    return "\n\n".join(gend_text), gend_images
    


# Define Gradio interface components
checkbox_choices = [os.path.basename(os.path.normpath(model_dir)) for model_dir in MODEL_DIRS]
iface = gr.Interface(
    fn=gen_text_and_gen_image,
    inputs=[
        gr.Radio(choices=["general", "nsfw"], label="Rating", value="general"),
        gr.Radio(choices=["2000s", "2010s", "2020s"], label="Date", value="2020s"),
        gr.Radio(choices=["normal", "good", "excellent"], label="Quality", value="excellent"),
        gr.Textbox(lines=1, placeholder="hatsune miku", label="Character tags"),
        gr.Textbox(lines=2, placeholder="1girl, long hair, looking at viewer", label="General Tags"),
        gr.Slider(minimum=40, maximum=300, value=180, step=10, label="Max Prompt Length"),
        gr.Number(value=-1, label="Image Generation Seed"),
        gr.Textbox(lines=1, value="best quality, 2020s", label="tags_front"),
        gr.Textbox(lines=1, value="[[absurdres]]", label="tags_back"),
    ],
    outputs=[
        gr.Textbox(label="Generated Texts"),
        gr.Gallery(label="Generated Images", height=768),
    ],
    title="Prompt Augment and SDXL New Model Demo",
    description="""
Pipeline:
  - input some danbooru tags (or danbooru-like tags)
  - augment the input to match the model training distribution
  - generate images from the augmented input (by calling ComfyUI API)
  - receives output

Input tags is in danbooru format (or similar): 
  - see: https://danbooru.donmai.us/posts/7793852
"""
)

# Launch the interface
iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://13d7d6124540429eda.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




  0%|          | 0/4 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Generated text 0: 1girl, green eyes, smile, maid headdress, looking at viewer, puffy short sleeves, eyebrows visible through hair, bangs, hair ornament, black dress, very long hair, white apron, alternate costume, enmaided, closed mouth, frilled apron, waist apron, maid apron, white background, hair between eyes, cleavage cutout, medium breasts, clothing cutout, genshin impact, maid outfit
Generated text 1: 1girl, brown hair, hair ornament, large breasts, short hair, open mouth, yellow eyes, hairclip, sitting, looking at viewer, collarbone, cleavage, outdoors, black swimsuit, hair between eyes, black bikini, bangs, holding towel, covered navel, bare shoulders, thighs, bench, wet clothes, one-piece swimsuit
Generated text 2: multiple girls, 2girls, red eyes, looking at another, ponytail, black neckwear, purple eyes, black skirt, very long hair, eye contact, silver hair, hand on another's face, bare shoulders, hair ribbon, red ribbon, pleated skirt, black choker, white shirt, sleeveless 