In [1]:
print("hello")

hello


In [2]:
import torch

# CUDA diagnostics
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda if torch.cuda.is_available() else 'Not available'}")

if torch.cuda.is_available():
    device_count = torch.cuda.device_count()
    print(f"CUDA device count: {device_count}")
    for i in range(device_count):
        print(f"CUDA device {i}: {torch.cuda.get_device_name(i)}")
        print(f"CUDA device {i} capability: {torch.cuda.get_device_capability(i)}")
        print(f"CUDA device {i} memory: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.2f} GB")
else:
    print("CUDA is not available. Using CPU only.")
    
# Test basic CUDA operation if available
if torch.cuda.is_available():
    try:
        x = torch.rand(5, 3).cuda()
        y = torch.rand(5, 3).cuda()
        z = x + y
        print("CUDA tensor operation successful!")
        print(z)
    except Exception as e:
        print(f"CUDA tensor operation failed: {e}")

PyTorch version: 2.5.1+cu121
CUDA available: True
CUDA version: 12.1
CUDA device count: 1
CUDA device 0: NVIDIA GeForce RTX 3090
CUDA device 0 capability: (8, 6)
CUDA device 0 memory: 23.68 GB
CUDA tensor operation successful!
tensor([[0.6416, 0.2420, 0.5296],
        [0.9849, 1.2160, 1.0862],
        [0.8950, 1.0398, 0.5796],
        [0.8477, 1.0390, 1.6287],
        [1.1263, 0.4763, 1.0878]], device='cuda:0')


In [3]:
import torch
print(f"CUDA devices available: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
    print(f"Device {i}: {torch.cuda.get_device_name(i)}")
print(f"Current device: {torch.cuda.current_device()}")

CUDA devices available: 1
Device 0: NVIDIA GeForce RTX 3090
Current device: 0


In [2]:
from diffusers import StableDiffusionPipeline
import torch

model_id = "sd-legacy/stable-diffusion-v1-5"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

  from .autonotebook import tqdm as notebook_tqdm
Loading pipeline components...: 100%|██████████| 7/7 [00:00<00:00, 13.71it/s]


In [5]:
import pynvml

# Initialize NVIDIA Management Library
pynvml.nvmlInit()

# Get handle for GPU 0 (first GPU)
handle = pynvml.nvmlDeviceGetHandleByIndex(0)

# Get GPU temperature in Celsius
temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)

# Print temperature
print(f"GPU Temperature: {temp}°C")

# Clean up
pynvml.nvmlShutdown()

GPU Temperature: 35°C


In [None]:
"""
check how many images i was able to generate in 3 minutes

temperature tracking - ie take temperature every second
"""
prompt = "a photo of an astronaut riding a horse on mars"
image = pipe(prompt).images[0]  
    
image.save("astronaut_rides_horse.png")

  0%|          | 0/50 [00:00<?, ?it/s]

100%|██████████| 50/50 [00:02<00:00, 22.32it/s]


In [9]:
import torch
import time
from tqdm import tqdm
import pynvml
import datetime

# Initialize NVIDIA Management Library for temperature monitoring
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)

# Set up timing variables
benchmark_duration = 10  # seconds
image_count = 0
total_gpu_time = 0
temp_readings = []

# Get current timestamp and create log filename
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
log_filename = f"benchmark_{timestamp}.txt"

# Prompt for generation
prompt = "a photo of an astronaut riding a horse on mars"

# Create output directory if needed
import os
os.makedirs("benchmark_output", exist_ok=True)

# Start log file
with open(log_filename, "w") as log:
    log.write(f"GPU Benchmark - {timestamp}\n")
    log.write(f"Device: {torch.cuda.get_device_name(0)}\n")
    log.write(f"Benchmark duration: {benchmark_duration} seconds\n")
    log.write(f"Prompt: {prompt}\n")
    log.write("-" * 50 + "\n\n")
    log.write("DETAILED LOG:\n")

# Start the benchmark
print(f"Starting benchmark for {benchmark_duration} seconds...")
start_time = time.time()
end_time = start_time + benchmark_duration

# Run until time is up
with tqdm() as pbar:
    while time.time() < end_time:
        # Get GPU temperature and add to list
        current_temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
        temp_readings.append(current_temp)
        
        # CUDA timing events
        start_event = torch.cuda.Event(enable_timing=True)
        end_event = torch.cuda.Event(enable_timing=True)
        
        # Synchronize before generation
        torch.cuda.synchronize()
        
        # Record start time
        start_event.record()
        
        # Generate image
        image = pipe(prompt).images[0]
        
        # Record end time
        end_event.record()
        torch.cuda.synchronize()
        
        # Calculate GPU time
        gpu_time_ms = start_event.elapsed_time(end_event)
        total_gpu_time += gpu_time_ms
        
        # Save image
        image.save(f"benchmark_output/image_{image_count:03d}.png")
        
        # Log this iteration
        with open(log_filename, "a") as log:
            log.write(f"Image {image_count}: Time={time.time()-start_time:.2f}s, Temp={current_temp}°C, GenTime={gpu_time_ms:.2f}ms\n")
        
        # Update counter and progress
        image_count += 1
        pbar.update(1)
        pbar.set_description(f"Generated: {image_count} imgs | Current temp: {current_temp}°C")

# Get final temperature reading
final_temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
temp_readings.append(final_temp)

# Calculate results
elapsed = time.time() - start_time
avg_time_ms = total_gpu_time / image_count if image_count > 0 else 0
avg_temp = sum(temp_readings) / len(temp_readings)
max_temp = max(temp_readings)

# Create summary
summary = "\n" + "="*50 + "\n"
summary += "BENCHMARK SUMMARY:\n"
summary += f"Benchmark completed in {elapsed:.2f} seconds\n"
summary += f"Images generated: {image_count}\n"
summary += f"Images per second: {image_count/elapsed:.2f}\n"
summary += f"Average GPU time per image: {avg_time_ms:.2f} ms\n"
summary += f"Total GPU processing time: {total_gpu_time/1000:.2f} seconds\n"
summary += f"GPU utilization: {(total_gpu_time/1000)/elapsed*100:.1f}%\n"
summary += f"\nTemperature Statistics:\n"
summary += f"  Starting temperature: {temp_readings[0]}°C\n"
summary += f"  Ending temperature: {final_temp}°C\n"
summary += f"  Average temperature: {avg_temp:.1f}°C\n"
summary += f"  Maximum temperature: {max_temp}°C\n"
summary += f"  Temperature increase: {final_temp - temp_readings[0]}°C\n"
summary += "="*50

# Print summary to console
print(summary)

# Add summary to log file
with open(log_filename, "a") as log:
    log.write(summary)

# Clean up
pynvml.nvmlShutdown()

print(f"Log saved to {log_filename}")

Starting benchmark for 10 seconds...


100%|██████████| 50/50 [00:02<00:00, 22.14it/s]
100%|██████████| 50/50 [00:02<00:00, 22.31it/s]00:02,  2.43s/it]
100%|██████████| 50/50 [00:02<00:00, 22.31it/s]00:04,  2.41s/it]
100%|██████████| 50/50 [00:02<00:00, 22.32it/s]00:07,  2.40s/it]
100%|██████████| 50/50 [00:02<00:00, 22.33it/s]00:09,  2.40s/it]
Generated: 5 imgs | Current temp: 51°C: : 5it [00:11,  2.40s/it]


BENCHMARK SUMMARY:
Benchmark completed in 11.99 seconds
Images generated: 5
Images per second: 0.42
Average GPU time per image: 2331.35 ms
Total GPU processing time: 11.66 seconds
GPU utilization: 97.2%

Temperature Statistics:
  Starting temperature: 39°C
  Ending temperature: 51°C
  Average temperature: 47.8°C
  Maximum temperature: 51°C
  Temperature increase: 12°C
Log saved to benchmark_2025-04-17_17-29-17.txt



