In [None]:
# Step 1: Install required libraries
!pip install diffusers transformers accelerate torch safetensors
!pip install pillow

# Step 2: Import necessary modules
from diffusers import StableDiffusionImg2ImgPipeline
import torch
from PIL import Image
from google.colab import files

# Step 3: Load the Stable Diffusion Image-to-Image model
def load_img2img_model():
    print("Checking for GPU availability...")
    if torch.cuda.is_available():
        device = "cuda"
        print("GPU is available. Using CUDA for acceleration.")
    else:
        device = "cpu"
        print("No GPU found. Falling back to CPU. This may be slower.")

    model_id = "runwayml/stable-diffusion-v1-5"
    print("Loading Stable Diffusion Image-to-Image model...")
    pipeline = StableDiffusionImg2ImgPipeline.from_pretrained(model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32)
    pipeline = pipeline.to(device)
    print(f"Model loaded successfully on device: {device}")
    return pipeline, device

# Step 4: Upload and load the input image
def upload_image():
    print("Please upload your input image...")
    uploaded = files.upload()  # Upload image file
    image_path = list(uploaded.keys())[0]  # Get the uploaded file's name
    print(f"Uploaded image: {image_path}")
    image = Image.open(image_path).convert("RGB")
    return image, image_path

# Step 5: Generate an image based on the input image and text prompt
def generate_image(pipeline, input_image, prompt, output_file, device, strength=0.75, guidance_scale=7.5):
    print(f"Generating image for prompt: {prompt}")
    output_image = pipeline(prompt=prompt, image=input_image, strength=strength, guidance_scale=guidance_scale).images[0]
    output_image.save(output_file)
    print(f"Generated image saved to: {output_file}")
    return output_file

# Step 6: Main function
if __name__ == "__main__":
    # Load the model
    pipeline, device = load_img2img_model()

    # Upload and load the input image
    uploaded_image, input_image_path = upload_image()

    # Define the text prompt
    text_prompt = "A futuristic cityscape with neon lights and flying cars"

    # Define the output file path
    output_image_path = "/content/generated_image.jpg"

    # Generate the image
    output_file_path = generate_image(pipeline, uploaded_image, text_prompt, output_image_path, device)

    # Download the generated image
    print("Downloading the generated image...")
    files.download(output_file_path)


Checking for GPU availability...
No GPU found. Falling back to CPU. This may be slower.
Loading Stable Diffusion Image-to-Image model...


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Model loaded successfully on device: cpu
Please upload your input image...


Saving logo.jpg to logo (1).jpg
Uploaded image: logo (1).jpg
Generating image for prompt: A futuristic cityscape with neon lights and flying cars


  0%|          | 0/37 [00:00<?, ?it/s]