## GPT V3

单个模型(fixed), 直接对接到webui (生成很多个), 所以产生一行一个的结果

In [1]:
from tqdm.auto import tqdm
import gradio as gr
import transformers
import torch
import os


import os
import hashlib
import json
from datetime import datetime
import concurrent.futures


# Assuming DEVICE is already defined
DEVICE = 'cuda'  # Use 'cuda' for GPU or 'cpu' for CPU

# Path to the models' directories
MODEL_DIRS = [
    'pixiv-prompts-gpt-finetunes/8xh100_run2_e2_s50k',
]

# Load the model and tokenizer from the directory
MODEL_NAME = os.path.basename(os.path.normpath(MODEL_DIRS[0]))
model = transformers.GPT2LMHeadModel.from_pretrained(MODEL_DIRS[0]).to(DEVICE).eval()
tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_DIRS[0])

def generate_text(rating: str, date: str, quality: str, character: str, prompt: str, max_length: int, num_lines: int):
    """Generate text based on input prompt."""
    query_prompt = f'<input rating="{rating}" chara="{character}" date="{date}" quality="{quality}" tags="{prompt}">'
    query_prompt += "<output>"

    all_outputs = []
    for _ in range(num_lines):
        input_ids = tokenizer.encode(query_prompt, return_tensors='pt').to(DEVICE)
        output_sequences = model.generate(
            input_ids=input_ids,
            max_length=max_length + len(input_ids[0]),
            temperature=1.0,
            top_k=50,
            top_p=0.95,
            repetition_penalty=1.0,
            do_sample=True,
            num_return_sequences=1,
        )

        generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)

        # Find and remove the initial part up to <output>
        start_output = generated_text.find("<output>") + len("<output>")
        generated_text = generated_text[start_output:].strip()

        # Remove the ending </output> tag or truncate at last complete tag
        end_tag = generated_text.find("</output>")
        if end_tag != -1:
            generated_text = generated_text[:end_tag]
        else:
            last_comma = generated_text.rfind(",")
            if last_comma != -1:
                generated_text = generated_text[:last_comma]

        all_outputs.append(generated_text.strip())

    # Join all outputs into a single string separated by new lines
    return "\n".join(all_outputs)

In [2]:
IMG_OUTPUT_DIR = "generated_images"


def save_images_and_metadata(gend_images, gend_text, prompt):
    # Ensure the output directory exists
    os.makedirs(IMG_OUTPUT_DIR, exist_ok=True)

    for image in gend_images:
        # Get filename
        timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
        img_hash = hashlib.md5(image.tobytes()).hexdigest()        
        file_name = f"{timestamp}_{img_hash[:4]}.webp"        
        file_path = os.path.join(IMG_OUTPUT_DIR, file_name)
        
        # Save image
        image.save(file_path, format='WEBP')

        # Create metadata JSON
        metadata = {
            "prompt": prompt,
            "generated_text": gend_text,
        }
        json_file_name = f"{timestamp}_{img_hash[:4]}.json"
        json_file_path = os.path.join(IMG_OUTPUT_DIR, json_file_name)
        
        with open(json_file_path, 'w') as json_file:
            json.dump(metadata, json_file)

In [5]:
from typing import List, Tuple
from concurrent.futures import ThreadPoolExecutor
from run_comfy_api import run_workflow, DEFAULT_NEG, DEFAULT_MODEL


HARDCODED_MODEL_NAME = os.path.basename(os.path.normpath(MODEL_DIRS[0]))

def gen_text_and_gen_image(rating: str, date: str, quality: str, character: str, prompt: str, max_length: int, 
                           img_count:int, seed: int, tags_front: str, tags_back: str) -> Tuple[str, List[str]]:
    
    gend_text = []
    gend_images = []

    with ThreadPoolExecutor() as executor:
        for _ in range(img_count):
            text_future = executor.submit(generate_text, rating, date, quality, character, prompt, max_length, 1)
            txt = text_future.result().split("\n")[0]
            print(f"Generated text: {txt}")

            # Add quality tags
            txt = ", ".join([tags_front, txt, tags_back])
            gend_text.append(txt)

            image_future = executor.submit(run_workflow, pos=txt, 
                                           neg="lowres, worst quality, displeasing, bad quality, bad anatomy, text, error, extra digit, cropped, average quality, 2000s",
                                           seed=seed, batch_size=1)
            _gend_images = image_future.result()
            gend_images.extend(_gend_images)

    # Save images and metadata in the background
    with ThreadPoolExecutor() as executor:
        executor.submit(save_images_and_metadata, gend_images, gend_text, prompt)
    
    return "\n\n".join(gend_text), gend_images
    

# Define Gradio interface components
checkbox_choices = [os.path.basename(os.path.normpath(model_dir)) for model_dir in MODEL_DIRS]
iface = gr.Interface(
    fn=gen_text_and_gen_image,
    inputs=[
        gr.Radio(choices=["general", "nsfw"], label="Rating", value="general"),
        gr.Radio(choices=["2000s", "2010s", "2020s"], label="Date", value="2020s"),
        gr.Radio(choices=["normal", "good", "excellent"], label="Quality", value="excellent"),
        gr.Textbox(lines=1, placeholder="hatsune miku", label="Character tags"),
        gr.Textbox(lines=2, placeholder="1girl, long hair, looking at viewer", label="General Tags"),
        gr.Slider(minimum=40, maximum=300, value=120, step=10, label="Max Prompt Length"),
        gr.Slider(minimum=1, maximum=4, value=2, step=1, label="Image Generation Count"),
        gr.Number(value=-1, label="Image Generation Seed"),
        gr.Textbox(lines=1, value="best quality, 2020s", label="tags_front"),
        gr.Textbox(lines=1, value="[[absurdres]]", label="tags_back"),
    ],
    outputs=[
        gr.Textbox(label="Generated Texts"),
        gr.Gallery(label="Generated Images", height=768),
    ],
    title="Prompt Augment and SDXL New Model Demo",
    description="""
Pipeline:
  - input some danbooru tags (or danbooru-like tags)
  - augment the input to match the model training distribution
  - generate images from the augmented input (by calling ComfyUI API)
  - receives output

Input tags is in danbooru format (or similar): 
  - see: https://danbooru.donmai.us/posts/7793852
"""
)

# Launch the interface
iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7862
Running on public URL: https://248f7375fffcaccf75.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated text: yoisaki kanade, 1girl, japanese clothes, hair ornament, oil-paper umbrella, hair flower, open mouth, looking at viewer, holding umbrella, nail polish, floral print, hair between eyes, eyebrows visible through hair, wide sleeves, bangs, long sleeves, red flower, upper body, print kimono, outdoors, depth of field, brown eyes, blurry background, blurry foreground, from side, :o, red nails, looking to the side, hand up, white flower, red eyes, lens flare, white background, grey kimono


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated text: yoisaki kanade, 1girl, hair between eyes, long sleeves, parted lips, very long hair, looking at viewer, collarbone, sleeves past wrists, holding envelope, star (symbol), hands up, grey background, simple background, blush, brown sweater, grey bow, eyebrows visible through hair, holding letter, grey ribbon, upper body, white background, black bow, project sekai, 25-ji, night code de., prsk_fa, hair ornament
