In [1]:
#import libraries
import os
import time 
import argparse
from PIL import Image, ImageChops

from diffusers import AutoPipelineForText2Image
from diffusers import StableDiffusionImg2ImgPipeline
    
import torch
import imageio
import sys      # provides access to some vars used or maintained by Python interpreter, and to functions that interact with it.

from xformers.ops import MemoryEfficientAttentionFlashAttentionOp # Enable memory efficient attention from xFormers.(not working atm)

import numpy as np   # for 3D rotate
import scipy.ndimage # for 3D rotate

#check cuda availability
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print('Using device:', device)


A matching Triton is not available, some optimizations will not be enabled
Traceback (most recent call last):
    from xformers.triton.softmax import softmax as triton_softmax  # noqa
    import triton
ModuleNotFoundError: No module named 'triton'


Using device: cuda


In [2]:
#define model and pipeline

#optimisation trick #1 - Use TensorFloat-32
'''On Ampere and later CUDA devices, matrix multiplications
 and convolutions can use the TensorFloat-32 (TF32) mode for
 faster, but slightly less accurate computations. 
 By default, PyTorch enables TF32 mode for convolutions,
 but NOT for matmul. enabling it can significantly speed up
 computations with minimal loss in numerical accuracy.
'''
torch.backends.cuda.matmul.allow_tf32 = True
# note just this change improved inference speed by 24% on RTX 3070

args = None

# path to SD1.5 model
SD15_MODEL = "D:\_DUPLICATED\_duplicatedOnSSDgate_1TB\SD_CKPTS_used_by_Auto1111\realisticVisionV51_v51VAE.safetensors"
LOOP_COUNT = 4 # Number of times to loop the final animation

# Define pipeline (half-precision weights for faster execution)
# def get_pipeline():
#     return (
#         StableDiffusionImg2ImgPipeline.from_single_file(
#             SD15_MODEL, torch_dtype=torch.float16
#         ).to("cuda"),
        
#         AutoPipelineForText2Image.from_pretrained("runwayml/stable-diffusion-v1-5",
#             torch_dtype=torch.float16,
#             variant="fp16", use_safetensors=True, safety_checker=None,
#         ).to("cuda")
#     )

def get_pipeline():
    return (
        StableDiffusionImg2ImgPipeline.from_single_file( SD15_MODEL, torch_dtype=torch.float16 ).to("cuda"),
        
        AutoPipelineForText2Image.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16,
            variant="fp16", use_safetensors=True, safety_checker=None,
        ).to("cuda")
    )



In [3]:
# function to Generate the image

def fn_generate(input_path, prompt, pipeline):
    init_image = None
    if input_path:
        # Load the input image
        init_image = Image.open(input_path).convert("RGB")
        
    # Generate the output image
    return pipeline( prompt=prompt, image=init_image, strength=args.strength,
        num_inference_steps=args.num_inference_steps, guidance_scale=args.guidance_scale,
        generator=( torch.Generator(device="cuda").manual_seed(args.seed) if args.seed else None ) ).images[0]

In [4]:
def fn_check_pipeline():
    # Check if the pipeline is working
    
    img2img_pipeline = get_pipeline()
    #print(f"img2img_pipeline: {img2img_pipeline}")
    print(f"img2img_pipeline: {str(img2img_pipeline)[:100]}") # print the first 100 characters of the pipeline
    
    if img2img_pipeline is None:
        raise ValueError("pipeline for img2img has issues")

    return

fn_check_pipeline()     #test the pipeline

OSError: We couldn't connect to 'https://huggingface.co' to load this model, couldn't find it in the cached files and it looks like (None,) is not the path to a directory containing a file named None or 
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/diffusers/installation#offline-mode'.

In [1]:
#create argument parser
class Args:
    init_image = None
    output_path = "output-frames"
    count = 50  # number of frames to generate
    prompt = "a strange being, solo, male, wearing light brown sackcloth, 21st century, style of Hieronymus Bosch, spurious, rotors, floral background, flowers, flowery background"  # replace with your value
    strength = 0.65
    seed = None
    num_inference_steps = 10
    guidance_scale = 7
    skip_frame_generation = False
    gif = True
    no_interpolate = True # disable motion interpolation for final ffmpeg post-processing
    model = "SD15"

args = Args()

In [None]:
# main function to generate the images

if not args.skip_frame_generation:
    img2img_pipeline, text_pipeline = get_pipeline()

    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)

    if args.init_image:
        # get initial image
        src_image = args.init_image

        # copy the initial image to the output path as frame 0
        init_image = Image.open(src_image).convert("RGB")
        #init_image = init_image.resize((1024, 1024))
        init_image.save(os.path.join(args.output_path, f'frame_{"0".zfill(4)}.png'))

    else:
        # generate first image from prompt
        #fn_check_pipeline()
        text_pipeline.enable_model_cpu_offload() # this MASSIVELY speeds up inference
        # next line doesn't work :(
        #text_pipeline.enable_xformers_memory_efficient_attention(attention_op=MemoryEfficientAttentionFlashAttentionOp)
        outimage = fn_generate(None, args.prompt, text_pipeline)
        outpath = os.path.join(args.output_path, f'frame_{"0".zfill(4)}.png')
        outimage.save(outpath)
        src_image = outpath

    
    # Generate the images
    tick = time.time_ns()

    for frame_num in range(args.count + 1):
        frame_id = str(frame_num + 1).zfill(4)
        output_file_path = os.path.join(args.output_path, f"frame_{frame_id}.png")

        print(f"Generating image for {src_image} to {output_file_path}...")

        outimage = fn_generate(src_image, args.prompt, img2img_pipeline)
        
        #APPLY TRANSFORMS HERE
        #=====================
        outimage = ImageChops.offset(outimage, -2, 2)     # translate image
        
        # zoom, rotate, then crop to 512x512
        outimage = outimage.resize((int(outimage.width * 1.03), int(outimage.height * 1.03)))
        outimage = outimage.rotate(1)
        outimage = outimage.crop((0, 0, 512, 512))
        
        outimage.save(output_file_path)     # save the transformed image
        
        src_image = output_file_path        # set the source image for the next iteration
    
    tock = time.time_ns()
    baseline = f"{(tock - tick) / 1e9:.1f}" # convert to seconds
    print(f"Execution time -- {baseline} seconds\n")


In [None]:
# post-processing

if True:  # args.gif:
    images = []

    # skip first frame, since it can be overpowering
    # may want to skip first few frames
    for i in range(1, args.count + 1):
        images.append(
            imageio.imread(
                os.path.join(args.output_path, f"frame_{str(i).zfill(4)}.png")
            )
        )

    imageio.mimsave("output.gif", images, duration=1.0)
    print("GIF saved to output.gif")

    if not args.no_interpolate:
        # check if ffmpeg is available
        if os.system("ffmpeg -version") != 0:
            print("ffmpeg not found, skipping interpolation")
            exit(1)

        # interpolate the gif
        os.system(
            f"ffmpeg -i output.gif -filter \"minterpolate='fps=60'\" output-interpolated-once.webm"
        )
        os.system(
            f'ffmpeg -stream_loop {LOOP_COUNT} -i output-interpolated-once.webm -c copy "{args.prompt}.webm"'
        )

        os.remove("output-interpolated-once.webm")
