In [1]:
'''
Runs SD via diffusers, creates a series of frames, each frame is transformed in some way and feedback occurs, and saves an avi at the end
from original script by Dr47. Apr/May 2024
'''
#import libraries
import os
import time 
import argparse
from PIL import Image
from diffusers import AutoPipelineForText2Image
from diffusers import StableDiffusionImg2ImgPipeline
import torch
import imageio
import sys      # provides access to some vars used or maintained by Python interpreter, and to functions that interact with it.
from xformers.ops import MemoryEfficientAttentionFlashAttentionOp # Enable memory efficient attention from xFormers.(not working atm)
import numpy as np
from scipy.ndimage import zoom

#check cuda availability
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print('Using device:', device)

A matching Triton is not available, some optimizations will not be enabled
Traceback (most recent call last):
  File "c:\MyPythonCoding\MyDeepLearningCoding\myDLvenv1\lib\site-packages\xformers\__init__.py", line 55, in _is_triton_available
    from xformers.triton.softmax import softmax as triton_softmax  # noqa
  File "c:\MyPythonCoding\MyDeepLearningCoding\myDLvenv1\lib\site-packages\xformers\triton\softmax.py", line 11, in <module>
    import triton
ModuleNotFoundError: No module named 'triton'


Using device: cuda


In [2]:
# MAIN PARAMETERS CAN BE CHANGED HERE
zoom_fac    = 1.000                     # zoom factor for each frame
frames      = 90                        # number of frames
prompt      = 'a painting of the old Boathouse deli at John"s, Appledore,mid-century, coastal palette, sunny' # prompt for the text to image model

In [3]:
#define model and pipeline

#optimisation trick #1 - Use TensorFloat-32
'''PyTorch enables TF32 mode for convolutions, but NOT for matmul. Enabling it can 
significantly (eg: 24% on RTX3070) speed up computations with minimal loss in numerical accuracy.
'''
torch.backends.cuda.matmul.allow_tf32 = True

args = None

# path to SD1.5 model
SD15_MODEL = "D:/_DUPLICATED/_duplicatedOnSSDgate_1TB/SD_CKPTS_used_by_Auto1111/realisticVisionV51_v51VAE.safetensors"

LOOP_COUNT = 4 # Number of times to loop the final animation (results in loop count +1)

# Define pipeline (half-precision weights for faster execution)
def get_pipeline():
    return (
        StableDiffusionImg2ImgPipeline.from_single_file( SD15_MODEL, torch_dtype=torch.float16 ).to("cuda"),
        
        AutoPipelineForText2Image.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16,
            variant="fp16", use_safetensors=True, safety_checker=None,   ).to("cuda")    )

In [4]:
# function to Generate the image
def fn_generate(input_path, prompt, pipeline):
    init_image = None
    if input_path:
        # Load the input image
        init_image = Image.open(input_path).convert("RGB")
    # Generate the output image
    return pipeline( prompt=prompt, image=init_image, strength=args.strength,
        num_inference_steps=args.num_inference_steps, guidance_scale=args.guidance_scale,
        generator=( torch.Generator(device="cuda").manual_seed(args.seed) if args.seed else None ) ).images[0]

In [5]:
def fn_transform_frame(np_image,zoom_fac): #expects a numpy array
    # Shrink the image
    zoomed_img = zoom(np_image, (zoom_fac, zoom_fac, 1))
    zoomed_w = zoomed_img.shape[1]
    zoomed_h = zoomed_img.shape[0]

    # Create a placeholder image - make it larger to accommodate the zoomed image
    new_img = np.zeros((768, 768, 3), dtype=np.uint8)
    new_w = new_img.shape[1]
    new_h = new_img.shape[0]

    # Calculate the start indices for the zoomed image
    start_i = (new_h - zoomed_h) // 2
    start_j = (new_w - zoomed_w) // 2

    # Copy the zoomed image into center of new image
    new_img[start_i:start_i+zoomed_h, start_j:start_j+zoomed_w] = zoomed_img

    #shift the image 4 pixels to the right
    #new_img = np.roll(new_img, 12, axis=1)
    
    # Add white noise to the black pixels at borders (if present)
    black_pixels = (new_img == [0, 0, 0]).all(axis=-1)
    low = 0 # range of noise values to make white noise less contrasty
    high = 255
    new_img[black_pixels] = np.random.randint(low,high, size=(black_pixels.sum(), 3), dtype=np.uint8)

    # add white noise to 5% of the pixels
    noise_pixels = np.random.rand(new_h, new_w) < 0.05
    new_img[noise_pixels] = np.random.randint(low,high, size=(noise_pixels.sum(), 3), dtype=np.uint8)

    # # swap 5% of pixels with each other
    # swap_pixels = np.random.rand(new_h, new_w) < 0.05
    # swap_indices = np.random.randint(0, new_h, size=(swap_pixels.sum(), 2))
    # new_img[swap_pixels] = new_img[swap_indices[:,0], swap_indices[:,1]]
    
    #crop image to 512x512
    new_img = new_img[128:640, 128:640]

    # Convert np array back to image and save
    f = Image.fromarray(new_img)

    return f


In [6]:
def fn_check_pipeline():
    img2img_pipeline = get_pipeline() # Check if the pipeline is working
    #print(f"img2img_pipeline: {img2img_pipeline}")
    print(f"img2img_pipeline: {str(img2img_pipeline)[:100]}") # print the first 100 characters of the pipeline
    
    if img2img_pipeline is None:
        raise ValueError("pipeline for img2img has issues")
    return
#fn_check_pipeline()     #test the pipeline

In [7]:
#create argument parser
class Args():
    init_image = None
    output_path = "output-frames"
    count = frames  # number of frames to generate
    prompt = prompt # prompt for the text to image model
    strength = 0.65 # was 0.65
    seed = None
    num_inference_steps = 12
    guidance_scale = 8
    skip_frame_generation = False
    model = "SD15"

args = Args()

In [8]:
# main function to generate the images

if not args.skip_frame_generation:
    if not os.path.exists(SD15_MODEL):
        raise FileNotFoundError(f"Model file not found at {SD15_MODEL}. Please check the path and try again.")
    
    img2img_pipeline, text_pipeline = get_pipeline()
    print('pipelines have been gotten!')
    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)

    if args.init_image:
        # get initial image
        src_image = args.init_image

        # copy the initial image to the output path as frame 0
        init_image = Image.open(src_image).convert("RGB")
        init_image = init_image.resize((512,512))
        init_image.save(os.path.join(args.output_path, f'frame_{"0".zfill(4)}.png'))

    else:
        # generate first image from prompt
        #fn_check_pipeline()
        text_pipeline.enable_model_cpu_offload() # this MASSIVELY speeds up inference
        # next line doesn't work :(
        #text_pipeline.enable_xformers_memory_efficient_attention(attention_op=MemoryEfficientAttentionFlashAttentionOp)
        outimage = fn_generate(None, args.prompt, text_pipeline)
        outpath = os.path.join(args.output_path, f'frame_{"0".zfill(4)}.png')
        outimage.save(outpath)
        src_image = outpath

    
    # Generate the images
    print ('starting the clock!...')
    tick = time.time_ns()

    for frame_num in range(args.count + 1):
        frame_id = str(frame_num + 1).zfill(4)
        file = os.path.join(args.output_path, f"frame_{frame_id}.png")

        print(f"Generating image from {src_image} and saving as {file}...")
        outimage = fn_generate(src_image, args.prompt, img2img_pipeline)
        
        outimage.save(file)       # save the ORIGINAL image FIRST

        #APPLY TRANSFORMS HERE
        #=====================
        trans_image = fn_transform_frame(np.array(outimage),zoom_fac) # convert to numpy array    
        trans_image.save(file)    # save the transformed image
        src_image = file          # set the source image for the next iteration
    
    tock = time.time_ns()
    baseline = f"{(tock - tick) / 1e9:.1f}" # convert to seconds
    print(f"Execution time -- {baseline} seconds\n")

Some weights of the model checkpoint were not used when initializing CLIPTextModel: 
 ['text_model.embeddings.position_ids']
You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


pipelines have been gotten!


  0%|          | 0/12 [00:00<?, ?it/s]

  hidden_states = F.scaled_dot_product_attention(


starting the clock!...
Generating image from output-frames\frame_0000.png and saving as output-frames\frame_0001.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0001.png and saving as output-frames\frame_0002.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0002.png and saving as output-frames\frame_0003.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0003.png and saving as output-frames\frame_0004.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0004.png and saving as output-frames\frame_0005.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0005.png and saving as output-frames\frame_0006.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0006.png and saving as output-frames\frame_0007.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0007.png and saving as output-frames\frame_0008.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0008.png and saving as output-frames\frame_0009.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0009.png and saving as output-frames\frame_0010.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0010.png and saving as output-frames\frame_0011.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0011.png and saving as output-frames\frame_0012.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0012.png and saving as output-frames\frame_0013.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0013.png and saving as output-frames\frame_0014.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0014.png and saving as output-frames\frame_0015.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0015.png and saving as output-frames\frame_0016.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0016.png and saving as output-frames\frame_0017.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0017.png and saving as output-frames\frame_0018.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0018.png and saving as output-frames\frame_0019.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0019.png and saving as output-frames\frame_0020.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0020.png and saving as output-frames\frame_0021.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0021.png and saving as output-frames\frame_0022.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0022.png and saving as output-frames\frame_0023.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0023.png and saving as output-frames\frame_0024.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0024.png and saving as output-frames\frame_0025.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0025.png and saving as output-frames\frame_0026.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0026.png and saving as output-frames\frame_0027.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0027.png and saving as output-frames\frame_0028.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0028.png and saving as output-frames\frame_0029.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0029.png and saving as output-frames\frame_0030.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0030.png and saving as output-frames\frame_0031.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0031.png and saving as output-frames\frame_0032.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0032.png and saving as output-frames\frame_0033.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0033.png and saving as output-frames\frame_0034.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0034.png and saving as output-frames\frame_0035.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0035.png and saving as output-frames\frame_0036.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0036.png and saving as output-frames\frame_0037.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0037.png and saving as output-frames\frame_0038.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0038.png and saving as output-frames\frame_0039.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0039.png and saving as output-frames\frame_0040.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0040.png and saving as output-frames\frame_0041.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0041.png and saving as output-frames\frame_0042.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0042.png and saving as output-frames\frame_0043.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0043.png and saving as output-frames\frame_0044.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0044.png and saving as output-frames\frame_0045.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0045.png and saving as output-frames\frame_0046.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0046.png and saving as output-frames\frame_0047.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0047.png and saving as output-frames\frame_0048.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0048.png and saving as output-frames\frame_0049.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0049.png and saving as output-frames\frame_0050.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0050.png and saving as output-frames\frame_0051.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0051.png and saving as output-frames\frame_0052.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0052.png and saving as output-frames\frame_0053.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0053.png and saving as output-frames\frame_0054.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0054.png and saving as output-frames\frame_0055.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0055.png and saving as output-frames\frame_0056.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0056.png and saving as output-frames\frame_0057.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0057.png and saving as output-frames\frame_0058.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0058.png and saving as output-frames\frame_0059.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0059.png and saving as output-frames\frame_0060.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0060.png and saving as output-frames\frame_0061.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0061.png and saving as output-frames\frame_0062.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0062.png and saving as output-frames\frame_0063.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0063.png and saving as output-frames\frame_0064.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0064.png and saving as output-frames\frame_0065.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0065.png and saving as output-frames\frame_0066.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0066.png and saving as output-frames\frame_0067.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0067.png and saving as output-frames\frame_0068.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0068.png and saving as output-frames\frame_0069.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0069.png and saving as output-frames\frame_0070.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0070.png and saving as output-frames\frame_0071.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0071.png and saving as output-frames\frame_0072.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0072.png and saving as output-frames\frame_0073.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0073.png and saving as output-frames\frame_0074.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0074.png and saving as output-frames\frame_0075.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0075.png and saving as output-frames\frame_0076.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0076.png and saving as output-frames\frame_0077.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0077.png and saving as output-frames\frame_0078.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0078.png and saving as output-frames\frame_0079.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0079.png and saving as output-frames\frame_0080.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0080.png and saving as output-frames\frame_0081.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0081.png and saving as output-frames\frame_0082.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0082.png and saving as output-frames\frame_0083.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0083.png and saving as output-frames\frame_0084.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0084.png and saving as output-frames\frame_0085.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0085.png and saving as output-frames\frame_0086.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0086.png and saving as output-frames\frame_0087.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0087.png and saving as output-frames\frame_0088.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0088.png and saving as output-frames\frame_0089.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0089.png and saving as output-frames\frame_0090.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Generating image from output-frames\frame_0090.png and saving as output-frames\frame_0091.png...


  0%|          | 0/7 [00:00<?, ?it/s]

Execution time -- 188.7 seconds



In [9]:
# post-processing
images = []
# maybe skip first frame, since it can be overpowering? 
# gather all the frames
for i in range(1, args.count + 1):
    images.append( imageio.imread( os.path.join(args.output_path, f"frame_{str(i).zfill(4)}.png") ) )
    if i % 10 == 0:
        print(f"loaded {i} of {args.count+1} frames...")

imageio.mimsave("output.avi", images, fps=12)
print('all done. output.avi saved')

loaded 10 of 91 frames...
loaded 20 of 91 frames...


  images.append( imageio.imread( os.path.join(args.output_path, f"frame_{str(i).zfill(4)}.png") ) )


loaded 30 of 91 frames...
loaded 40 of 91 frames...
loaded 50 of 91 frames...
loaded 60 of 91 frames...
loaded 70 of 91 frames...
loaded 80 of 91 frames...
loaded 90 of 91 frames...
all done. output.avi saved
