# Packages

In [2]:
import torch
import gc #GPU Memory Optimierung
from diffusers import FluxPipeline, AutoPipelineForText2Image, StableDiffusion3Pipeline, FluxKontextPipeline
from diffusers.utils import load_image
# from accelerate import load_checkpoint_and_dispatch #manuelles verschieben Elemente auf GPU Geräte
import os
import sys
from PIL import Image
import datetime #benötigt zur Generierung von Suffixen für Speichern von Dateien
import random
import time
from diffusers import BitsAndBytesConfig, PipelineQuantizationConfig, SD3Transformer2DModel # Quantisierungsoption

# diffusers=0.33.1

2025-07-16 11:39:20.596503: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-07-16 11:39:20.617803: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-07-16 11:39:20.624409: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-16 11:39:20.640172: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Bildgenerierung

## Initialisierung

In [None]:
model_question = 0

if model_question == 0:
    model_path = "/mount/point/models/FLUX.1-schnell"
    model = FluxPipeline.from_pretrained(model_path,
                                        torch_dtype=torch.bfloat16, #torch.bfloat32
                                        device_map = "balanced",
                                        # max_memory={0: "16GB", 1: "16GB", 2: "16GB", 3: "16GB"} #max memory falls benötigt und andere GPUs in Nutzung
                                        # text_encoder_2 = text_encoder
                                        )

else:
    model_path = "/mount/point/models/stable-diffusion-3.5-medium"
    model = StableDiffusion3Pipeline.from_pretrained(model_path,
                                        torch_dtype=torch.bfloat16, #torch.bfloat32
                                        device_map = "balanced",
                                        #transformer=model_nf4, # Quantisierung des Transformer-Modells
                                        )

print(f"Initialized image generating model: {model_path.rsplit('/')[-1]}")

#Bildgenerierungsfunktion
def pic_gen(prompt, save_path = None, height = 1024, width = 1024,
            guidance_scale = 0.0, num_inference_steps=4, generator_device = "cpu", seed = None,
            image_num = 1, display_picture = True, return_dict=False):
 
    """Generate a picture according to your specifications"""
 
    # Check if generator device is a valid device
    assert generator_device in ["cpu", "cuda", "ipu", "xpu", "mkldnn", "opengl", "opencl", "ideep", "hip", "ve", "fpga", "ort", "xla", "lazy", "vulkan", "mps", "meta", "hpu", "mtia", "privateuseone"], "Please enter a valid generator like the following: cpu, cuda, ipu, xpu, mkldnn, opengl, opencl, ideep, hip, ve, fpga, ort, xla, lazy, vulkan, mps, meta, hpu, mtia, privateuseone"
   
    if save_path != None:
        assert isinstance(save_path, str), "Please enter a valid string for the saving path of your image"
 
        path_test = save_path.rsplit("/", maxsplit=1)[0] #split the path by the last separator
        assert os.path.exists(path_test), "Please enter a valid path"
 
        file_format = save_path.rsplit(".", maxsplit=1)[-1] #the file format as string
        assert file_format in ["png", "jpg", "jpeg"], "Please enter a valid picture format to save the image"
    
    if seed != None:
        generator = torch.Generator(generator_device).manual_seed(seed)
    else:
        generator = torch.Generator(generator_device)

    image = model(
        prompt,
        height = height,
        width = width,
        num_images_per_prompt= image_num,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
        max_sequence_length=256, # if model_question == 1 else 512,
        generator=generator, #alternativ zu "cpu" "cuda" verwenden
        return_dict=return_dict,
        )
   
    if save_path != None:
 
        if image_num > 1: #saving all generated pictures
            for i, pic in enumerate(image[0]):
                img_path = save_path.rsplit(".", maxsplit=1)[0]
                img_path = f"{img_path}_{i}.{file_format}"
                pic.save(img_path)

                if display_picture == True:
                    im = Image.open(img_path)
                    #im.show()
                    display(im)
       
        else:
            img_path = save_path.rsplit(".", maxsplit=1)[0]
            img_path = f"{img_path}.{file_format}"
            image[0][0].save(img_path)

            if display_picture == True:
                im = Image.open(img_path)
                #im.show()
                display(im)
   
    # Clean Up
    gc.collect() #Free up GPU Memory
    torch.cuda.empty_cache()
   
    return image #return all the generated data

## Ausführung

In [None]:
# Parameter
height = 1072
width = 1920
guidance_scale=3.5
file_suffix = datetime.datetime.now().strftime("%y%m%d_%H%M%S") #generieren des aktuellen Suffixes
save_path=f"/mount/point/veith/generated_pictures/image_{file_suffix}.png" # Speicherort für generierte Bilder #abspeichern als zufällige Zeichenfolgen
image_num = 1
num_inference_steps=10
seed = None

prompt = """pareidolic anamorphosis of a hole in a brick wall morphed into a hublot of a sail boat, a window to the sea."""

image = pic_gen(prompt, 
        height = height,
        width = width,
        guidance_scale=guidance_scale,
        save_path=save_path,
        image_num = image_num,
        num_inference_steps=num_inference_steps,
        seed = seed, #423
        display_picture=True
        )

# Bildbearbeitung

## Initialisierung

In [3]:
model_path = "/mount/point/models/FLUX.1-Kontext-dev"

# 8-bit quantization für bottleneck transformer
quantization_config = PipelineQuantizationConfig(
    quant_backend="bitsandbytes_8bit",
    quant_kwargs={
        "load_in_8bit": True,
        "llm_int8_threshold": 6.0,
        "llm_int8_has_fp16_weight": False,
    },
    components_to_quantize=["transformer"]  # Only quantize the transformer component
)

model_edit = FluxKontextPipeline.from_pretrained(
    model_path,
    torch_dtype=torch.bfloat16,
    quantization_config=quantization_config,  # Try 8-bit first
    device_map="balanced",
)

# Bildeditierungsfunktion

def pic_edit(input_image, prompt: str, save_path: str = None,
            guidance_scale = 0.0, num_inference_steps=8, generator_device = "cpu", seed = None,
            image_num = 1, display_picture = True):
 
    """Generate a picture according to your specifications"""
 
    # Check if generator device is a valid device
    assert generator_device in ["cpu", "cuda", "ipu", "xpu", "mkldnn", "opengl", "opencl", "ideep", "hip", "ve", "fpga", "ort", "xla", "lazy", "vulkan", "mps", "meta", "hpu", "mtia", "privateuseone"], "Please enter a valid generator like the following: cpu, cuda, ipu, xpu, mkldnn, opengl, opencl, ideep, hip, ve, fpga, ort, xla, lazy, vulkan, mps, meta, hpu, mtia, privateuseone"
    
    if save_path != None:
        assert isinstance(save_path, str), "Please enter a valid string for the saving path of your image"
 
        path_test = save_path.rsplit("/", maxsplit=1)[0] #split the path by the last separator
        assert os.path.exists(path_test), "Please enter a valid path"
 
        file_format = save_path.rsplit(".", maxsplit=1)[-1] #the file format as string
        assert file_format in ["png", "jpg", "jpeg"], "Please enter a valid picture format to save the image"
    
    if seed != None:
        generator = torch.Generator(generator_device).manual_seed(seed)
    else:
        generator = torch.Generator(generator_device)
    
    image = model_edit(
        image=input_image,
        prompt=prompt,
        height = input_image.size[1],
        width = input_image.size[0],
        max_area=input_image.size[0] * input_image.size[1],
        num_images_per_prompt= image_num,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
        max_sequence_length=256, # if model_question == 1 else 512,
        generator=generator, #alternativ zu "cpu" "cuda" verwenden
        _auto_resize = False,
        )
    
    if save_path != None:
 
        if image_num > 1: #saving all generated pictures
            for i, pic in enumerate(image[0]):
                img_path = save_path.rsplit(".", maxsplit=1)[0]
                img_path = f"{img_path}_{i}.{file_format}"
                pic.save(img_path)

                if display_picture == True:
                    im = Image.open(img_path)
                    #im.show()
                    display(im)
       
        else:
            img_path = save_path.rsplit(".", maxsplit=1)[0]
            img_path = f"{img_path}.{file_format}"
            image[0][0].save(img_path)

            if display_picture == True:
                im = Image.open(img_path)
                #im.show()
                display(im)
   
    # Clean Up
    gc.collect() #Free up GPU Memory
    torch.cuda.empty_cache()
   
    return image #return all the generated data

i = 0

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## Iterative Bearbeitung

In [None]:
if i == 0:
    input_image_path = "/mount/point/veith/generated_pictures/image_250517_115934_1.png" # Pfad des zu bearbeitenden Bildes
    file_suffix = datetime.datetime.now().strftime("%y%m%d_%H%M%S") #generieren des aktuellen Suffixes
    save_path = f"/mount/point/veith/generated_pictures/edited_image_{file_suffix}.png"
else:
    input_image_path = save_path # Weil bereits ein Bild generiert werden musste und i!=0 sein wird, wird der Speicherpfad zum Referenzpfad
    # Speicherpfad bleibt konstant, weil dieser nur beim initialen Durchlauf generiert wird

input_image = load_image(input_image_path)
prompt = "Add googly eyes" # gewünschte Änderung des Bildes # Sollte zwischen Iterationen angepasst werden

# Bildbearbeitungsparameter
num_inference_steps = 16
generator_device = "cpu"
seed = None # Ersetzen mit int, falls spezifischer Seed gewünscht ist


# Ausführen der Bildbearbeitung
image = pic_edit(
    input_image=input_image,
    prompt=prompt,
    guidance_scale=7.5,
    save_path=save_path,
    image_num=1,
    num_inference_steps=num_inference_steps,
    seed=seed,
    display_picture=True,
)
i += 1 # Hochzählen, zum Signalisieren, dass ein Bild generiert wurde, welches fortfolgend stetig überschrieben wird