In [1]:
print("hello")

hello


In [2]:
import torch

# CUDA diagnostics
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda if torch.cuda.is_available() else 'Not available'}")

if torch.cuda.is_available():
    device_count = torch.cuda.device_count()
    print(f"CUDA device count: {device_count}")
    for i in range(device_count):
        print(f"CUDA device {i}: {torch.cuda.get_device_name(i)}")
        print(f"CUDA device {i} capability: {torch.cuda.get_device_capability(i)}")
        print(f"CUDA device {i} memory: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.2f} GB")
else:
    print("CUDA is not available. Using CPU only.")
    
# Test basic CUDA operation if available
if torch.cuda.is_available():
    try:
        x = torch.rand(5, 3).cuda()
        y = torch.rand(5, 3).cuda()
        z = x + y
        print("CUDA tensor operation successful!")
        print(z)
    except Exception as e:
        print(f"CUDA tensor operation failed: {e}")

PyTorch version: 2.5.1+cu121
CUDA available: True
CUDA version: 12.1
CUDA device count: 1
CUDA device 0: NVIDIA GeForce RTX 3090
CUDA device 0 capability: (8, 6)
CUDA device 0 memory: 23.68 GB
CUDA tensor operation successful!
tensor([[0.6416, 0.2420, 0.5296],
        [0.9849, 1.2160, 1.0862],
        [0.8950, 1.0398, 0.5796],
        [0.8477, 1.0390, 1.6287],
        [1.1263, 0.4763, 1.0878]], device='cuda:0')


In [3]:
import torch
print(f"CUDA devices available: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
    print(f"Device {i}: {torch.cuda.get_device_name(i)}")
print(f"Current device: {torch.cuda.current_device()}")

CUDA devices available: 1
Device 0: NVIDIA GeForce RTX 3090
Current device: 0


In [5]:
from diffusers import StableDiffusionPipeline
import torch

model_id = "sd-legacy/stable-diffusion-v1-5"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

Loading pipeline components...: 100%|██████████| 7/7 [00:00<00:00, 13.15it/s]


In [8]:
import pynvml

# Initialize NVIDIA Management Library
pynvml.nvmlInit()

# Get handle for GPU 0 (first GPU)
handle = pynvml.nvmlDeviceGetHandleByIndex(0)

# Get GPU temperature in Celsius
temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)

# Print temperature
print(f"GPU Temperature: {temp}°C")

# Clean up
pynvml.nvmlShutdown()

GPU Temperature: 36°C


In [None]:
"""
check how many images i was able to generate in 3 minutes

temperature tracking - ie take temperature every second
"""
prompt = "a photo of an astronaut riding a horse on mars"
image = pipe(prompt).images[0]  
    
image.save("astronaut_rides_horse.png")

  0%|          | 0/50 [00:00<?, ?it/s]

100%|██████████| 50/50 [00:02<00:00, 22.32it/s]


In [9]:
# Prompt for generation
prompt = "a photo of an astronaut riding a horse on mars"

# Create CUDA events for timing
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)

# Warmup run (to ensure any compilation/optimization happens before timing)
_ = pipe(prompt).images[0]

# Synchronize CUDA for accurate timing
torch.cuda.synchronize()

# Start timing
start_event.record()

# Run the model
image = pipe(prompt).images[0]

# End timing
end_event.record()

# Wait for GPU to finish
torch.cuda.synchronize()

# Calculate elapsed time in milliseconds
gpu_time_ms = start_event.elapsed_time(end_event)

# Print results
print(f"GPU execution time: {gpu_time_ms:.2f} ms ({gpu_time_ms/1000:.3f} seconds)")

# Save image
image.save("astronaut_rides_horse.png")

100%|██████████| 50/50 [00:02<00:00, 22.23it/s]
100%|██████████| 50/50 [00:02<00:00, 22.37it/s]


GPU execution time: 2319.58 ms (2.320 seconds)
