In [1]:
# !pip install transformers

In [3]:
import model_loader
import pipeline
from PIL import Image
from pathlib import Path
from transformers import CLIPTokenizer
import torch
import os

DEVICE = "cpu"

ALLOW_CUDA = True
ALLOW_MPS = False

if torch.cuda.is_available() and ALLOW_CUDA:
    DEVICE = "cuda"
elif (torch.has_mps or torch.backends.mps.is_available()) and ALLOW_MPS:
    DEVICE = "mps"
print(f"Using device: {DEVICE}")

tokenizer = CLIPTokenizer("C:/Users/Yash/Desktop/Projects/stable_diffusion/data/tokenizer_vocab.json", merges_file="C:/Users/Yash/Desktop/Projects/stable_diffusion/data/tokenizer_merges.txt")
model_file = "C:/Users/Yash/Desktop/Projects/stable_diffusion/data/v1-5-pruned-emaonly.ckpt"
models = model_loader.preload_models_from_standard_weights(model_file, DEVICE)

## TEXT TO IMAGE

prompt = "A dog with sunglasses, wearing comfy hat, looking at camera, highly detailed, ultra sharp, cinematic, 100mm lens, 8k resolution."
# prompt = "colorful texture like art, highly detailed, ultra sharp, cinematic, 100mm lens, 8k resolution."
# prompt = "A modern glass skyscraper of 50 floors, highly detailed, ultra sharp, cinematic, 100mm lens, 8k resolution."
uncond_prompt = ""  # Also known as negative prompt
do_cfg = True
cfg_scale = 8  # min: 1, max: 14

## IMAGE TO IMAGE

input_image = None
# Comment to disable image to image
image_path = "../images/dog.jpg"
# input_image = Image.open(image_path)
# Higher values means more noise will be added to the input image, so the result will further from the input image.
# Lower values means less noise is added to the input image, so output will be closer to the input image.
strength = 0.7

## SAMPLER

sampler = "ddpm"
num_inference_steps = 75
seed = 42

images = pipeline.generate(
    prompt=prompt,
    uncond_prompt=uncond_prompt,
    input_image=input_image,
    strength=strength,
    do_cfg=do_cfg,
    cfg_scale=cfg_scale,
    sampler_name=sampler,
    n_inference_steps=num_inference_steps,
    seed=seed,
    models=models,
    device=DEVICE,
    idle_device="cpu",
    tokenizer=tokenizer
)

  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda


100%|██████████| 75/75 [07:28<00:00,  5.98s/it]


In [5]:
def generate_images(images):
    for image in images:
        yield image[0]

generate_oi = generate_images(images)

In [2]:
# next(generate_oi)

In [None]:
import streamlit as st
import torch
from torchvision.utils import make_grid
import torchvision.transforms as T
import time

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
IMG_SIZE = 64  # or 128

st.set_page_config(layout="wide")
st.title("🌀 DDPM Generative Playground")

# Sidebar controls
st.sidebar.header("Generation Settings")
steps = st.sidebar.slider("Inference Steps", 20, 200, 75, step=5)
seed = st.sidebar.slider("Random Seed", 0, 9999, 42)
generate = st.sidebar.button("Generate")

output = st.empty() 

if generate:
    # torch.manual_seed(seed)

    st.write("Generating image...")
    # model = UNet().to(DEVICE)
    # model.load_state_dict(torch.load("model_weights.pth", map_location=DEVICE))
    
    for img_tensor in generate_oi:
        grid = make_grid(img_tensor, nrow=1)
        pil_img = T.ToPILImage()(grid)
        output.image(pil_img, caption=f"Step image", use_column_width=True)
        time.sleep(0.1)

2025-04-24 10:35:12.887 
  command:

    streamlit run c:\Users\Yash\Desktop\Projects\NN-from-scratch\nn-env\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


: 

In [1]:
import os
from PIL import Image
from pathlib import Path
from transformers import CLIPTokenizer
import torch

import model_loader
import pipeline

# ----------- DEVICE SELECTION ------------
DEVICE = "cpu"
ALLOW_CUDA = True
ALLOW_MPS = False

if torch.cuda.is_available() and ALLOW_CUDA:
    DEVICE = "cuda"
elif (torch.has_mps or torch.backends.mps.is_available()) and ALLOW_MPS:
    DEVICE = "mps"
print(f"Using device: {DEVICE}")

# ----------- TOKENIZER AND MODEL LOADING ------------
tokenizer = CLIPTokenizer(
    "C:/Users/Yash/Desktop/Projects/stable_diffusion/data/tokenizer_vocab.json",
    merges_file="C:/Users/Yash/Desktop/Projects/stable_diffusion/data/tokenizer_merges.txt"
)
model_file = "C:/Users/Yash/Desktop/Projects/stable_diffusion/data/v1-5-pruned-emaonly.ckpt"
models = model_loader.preload_models_from_standard_weights(model_file, DEVICE)

# ----------- PROMPT & GENERATION SETTINGS ------------
prompt = "ornamental sci-fi surface texture, glowing filigree, engraved runes, shimmering metal, iridescent hues, high detail, volumetric lighting, cinematic"
uncond_prompt = ""
do_cfg = True
cfg_scale = 8
input_image = None
strength = 0.5
sampler = "ddpm"
num_inference_steps = 50
seed = 42

# ----------- IMAGE GENERATION ------------

for i in range(2,5):
    num_inference_steps+=5
    strength+=0.05
    output_image = pipeline.generate(
        prompt=prompt,
        uncond_prompt=uncond_prompt,
        input_image=input_image,
        strength=strength,
        do_cfg=do_cfg,
        cfg_scale=cfg_scale,
        sampler_name=sampler,
        n_inference_steps=num_inference_steps,
        seed=seed,
        models=models,
        device=DEVICE,
        idle_device="cpu",
        tokenizer=tokenizer
    )

    # ----------- UNITY EXPORT SETTINGS ------------
    output_dir = "C:/Users/Yash/Desktop/textures_unity_ready"
    target_size = (128, 128)
    os.makedirs(output_dir, exist_ok=True)

    # Save image
    img = Image.fromarray(output_image).convert("RGB")
    img = img.resize(target_size, resample=Image.BICUBIC)

    # Optional: use timestamp or counter to avoid overwriting
    output_path = os.path.join(output_dir, f"texture_generated_{i}.png")
    img.save(output_path)
    print(f"Unity texture saved at: {output_path}")


  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda


100%|██████████| 55/55 [05:22<00:00,  5.86s/it]


Unity texture saved at: C:/Users/Yash/Desktop/textures_unity_ready\texture_generated_2.png


100%|██████████| 60/60 [06:12<00:00,  6.21s/it]


Unity texture saved at: C:/Users/Yash/Desktop/textures_unity_ready\texture_generated_3.png


100%|██████████| 65/65 [09:06<00:00,  8.41s/it]


Unity texture saved at: C:/Users/Yash/Desktop/textures_unity_ready\texture_generated_4.png


In [None]:
import torch
print(torch.version.cuda)
print(torch.cuda.get_device_name(0))

12.6
NVIDIA GeForce RTX 4060 Laptop GPU
