# Playground for Comparing Pipeline Outputs and Investigating Pipeline Memory Leaks

In [1]:
from diffusers import StableDiffusionPipeline, DDIMScheduler
from diffusers.utils import make_image_grid
from IPython.display import Image, display
import torch

In [2]:
pipe1 = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", safety_checker=None)
device = "cuda"
pipe1.to(device)


In [None]:

pipe2 = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", safety_checker=None)
pipe2.to(device)
pipe2.scheduler = DDIMScheduler.from_config(pipe2.scheduler.config)

In [5]:
print(type(pipe1.scheduler))
print(type(pipe2.scheduler))

In [3]:
def run_grid(prompts: list[str], cols: int, pipeline1: StableDiffusionPipeline, pipeline2: StableDiffusionPipeline | None = None, seed: int = 42):
    """
    Generate a grid of images from multiple prompts using batch processing.
    If pipeline2 is provided, results are displayed side by side for comparison.
    
    Args:
        prompts: List of prompts to generate images from
        cols: Number of images to generate per prompt
        pipeline1: First StableDiffusionPipeline to use for generation
        pipeline2: Optional second StableDiffusionPipeline to use for generation
        seed: Random seed for deterministic generation
    """
    rows = len(prompts)
    
    
    print(f"Generating {cols} images for each of {rows} prompts with pipeline1")
    
    # Generate images with first pipeline
    with torch.no_grad():
        generator1 = torch.Generator(device=pipeline1.device).manual_seed(seed)
        results1 = pipeline1(prompts, num_images_per_prompt=cols, generator=generator1)
        images1 = results1.images
    
    if pipeline2:
        print(f"Generating {cols} images for each of {rows} prompts with both pipelines")

        with torch.no_grad():
            generator2 = torch.Generator(device=pipeline2.device).manual_seed(seed)
            results2 = pipeline2(prompts, num_images_per_prompt=cols, generator=generator2)
            images2 = results2.images
        
        # Combine images from both pipelines
        combined_images = []
        for i in range(0, len(images1), cols):
            combined_images.extend(images1[i:i+cols])
            combined_images.extend(images2[i:i+cols])
        
        # Create and display the grid with double the number of columns
        grid = make_image_grid(combined_images, rows=rows, cols=cols*2)
    else:
        # Create and display the grid with images from pipeline1 only
        grid = make_image_grid(images1, rows=rows, cols=cols)
        
    display(grid)

In [7]:
prompts = ["a cat", "a dog", "a horse", "a bird", "trees", "a dog sunbathing", "a dog front profile", "a dog side profile"]
run_grid(prompts, 3, pipe1, pipe2)

In [4]:
prompts = [
"A bear as a painter working on a landscape",
"A bear as a soccer goalie making a save",
"A bear in a trench coat as a private investigator",
"A bear as a judge in a courtroom",
"A bear wearing a toga as a philosopher",
"A bear as a gardener tending to a rose garden",
"A bear as a ship captain steering a boat",
]
run_grid(prompts, 5, pipe1)

In [14]:
if pipe1:
    del pipe1
if pipe2:
    del pipe2

In [16]:

torch.cuda.empty_cache()


In [8]:
def check_tensor_memory(namespace=globals()):
    print("GPU Tensors in current namespace:")
    print("-" * 50)
    
    total_memory = 0
    for name, obj in namespace.items():
        if isinstance(obj, torch.Tensor):
            size_mb = obj.numel() * obj.element_size() / 1024 / 1024
            total_memory += size_mb
            print(f"{name:20s} | Shape: {str(obj.shape):20s} | "
                  f"Size: {size_mb:8.2f} MB | Device: {obj.device}")
    
    print("-" * 50)
    print(f"Total memory used: {total_memory:.2f} MB")

# Usage
check_tensor_memory()

In [9]:
for name, obj in list(globals().items()): # Or locals() if in a function
    if torch.is_tensor(obj) and obj.is_cuda:
        print(f"Variable: {name}, Shape: {obj.shape}, Data type: {obj.dtype}, Memory (MB): {(obj.nelement() * obj.element_size()) / 1024**2:.4f}")

In [10]:
import torch
import gc

def show_gpu_tensors():
    # Get IPython instance to access user namespace
    try:
        from IPython import get_ipython
        ipython = get_ipython()
        user_ns = ipython.user_ns if ipython else globals()
    except:
        user_ns = globals()
    
    print("GPU Tensors in IPython namespace:")
    print("=" * 70)
    
    gpu_tensors = []
    
    # Check all variables in user namespace
    for var_name, obj in user_ns.items():
        if isinstance(obj, torch.Tensor) and obj.is_cuda:
            size_mb = obj.numel() * obj.element_size() / 1024 / 1024
            gpu_tensors.append({
                'name': var_name,
                'tensor': obj,
                'size_mb': size_mb
            })
        # Also check if it's a list/dict containing tensors
        elif isinstance(obj, (list, tuple)):
            for i, item in enumerate(obj):
                if isinstance(item, torch.Tensor) and item.is_cuda:
                    size_mb = item.numel() * item.element_size() / 1024 / 1024
                    gpu_tensors.append({
                        'name': f"{var_name}[{i}]",
                        'tensor': item,
                        'size_mb': size_mb
                    })
        elif isinstance(obj, dict):
            for key, item in obj.items():
                if isinstance(item, torch.Tensor) and item.is_cuda:
                    size_mb = item.numel() * item.element_size() / 1024 / 1024
                    gpu_tensors.append({
                        'name': f"{var_name}['{key}']",
                        'tensor': item,
                        'size_mb': size_mb
                    })
    
    # Sort by memory usage
    gpu_tensors.sort(key=lambda x: x['size_mb'], reverse=True)
    
    total_memory = sum(t['size_mb'] for t in gpu_tensors)
    
    for tensor_info in gpu_tensors:
        tensor = tensor_info['tensor']
        print(f"Variable: {tensor_info['name']:25s} | "
              f"Shape: {str(tuple(tensor.shape)):20s} | "
              f"Size: {tensor_info['size_mb']:8.2f} MB | "
              f"Type: {str(tensor.dtype):15s} | "
              f"Device: {tensor.device}")
    
    print("=" * 70)
    print(f"Total GPU tensors found: {len(gpu_tensors)}")
    print(f"Total memory used: {total_memory:.2f} MB ({total_memory/1024:.2f} GB)")
    
    # Show overall GPU memory status
    if torch.cuda.is_available():
        print(f"GPU Memory - Allocated: {torch.cuda.memory_allocated()/1e9:.2f} GB, "
              f"Reserved: {torch.cuda.memory_reserved()/1e9:.2f} GB")

# Run it
show_gpu_tensors()

In [12]:
def find_all_gpu_tensors():
    print("All GPU tensors in memory (including unreferenced):")
    print("=" * 60)
    
    gpu_tensors = []
    
    # Use garbage collector to find all tensor objects
    for obj in gc.get_objects():
        if isinstance(obj, torch.Tensor) and obj.is_cuda:
            size_mb = obj.numel() * obj.element_size() / 1024 / 1024
            gpu_tensors.append({
                'tensor': obj,
                'size_mb': size_mb,
                'id': id(obj)
            })
    
    # Sort by size
    gpu_tensors.sort(key=lambda x: x['size_mb'], reverse=True)
    
    total_memory = sum(t['size_mb'] for t in gpu_tensors)
    
    print(f"{'Index':<6} {'Shape':<20} {'Size (MB)':<12} {'Type':<15} {'ID':<15}")
    print("-" * 75)
    
    for i, tensor_info in enumerate(gpu_tensors):
        tensor = tensor_info['tensor']
        print(f"{i+1:<6} {str(tuple(tensor.shape)):<20} "
              f"{tensor_info['size_mb']:<12.2f} {str(tensor.dtype):<15} "
              f"{tensor_info['id']:<15}")
    
    print("-" * 75)
    print(f"Total tensors: {len(gpu_tensors)}, Total memory: {total_memory:.2f} MB")

find_all_gpu_tensors()

In [13]:
import torch
import gc
from IPython import get_ipython

def find_tensor_references_safe():
    # Get the IPython namespace
    ipython = get_ipython()
    user_ns = ipython.user_ns if ipython else globals()
    
    # Target the huge tensors by their IDs
    huge_tensor_ids = [127537981066192, 127537981072672, 127537981065952]
    
    print("Searching for references to huge tensors...")
    print("=" * 60)
    
    # Create a snapshot of the namespace to avoid iteration issues
    namespace_snapshot = dict(user_ns)
    
    # Function to recursively search through objects
    def search_object(obj, path="", max_depth=2, current_depth=0):
        if current_depth > max_depth:
            return []
        
        references = []
        
        try:
            if isinstance(obj, torch.Tensor) and obj.is_cuda:
                if id(obj) in huge_tensor_ids:
                    size_mb = obj.numel() * obj.element_size() / 1024 / 1024
                    references.append((path, obj.shape, size_mb, id(obj)))
            
            elif isinstance(obj, (list, tuple)) and current_depth < max_depth and len(obj) < 1000:
                for i, item in enumerate(obj[:100]):  # Limit to first 100 items
                    new_path = f"{path}[{i}]" if path else f"item[{i}]"
                    references.extend(search_object(item, new_path, max_depth, current_depth + 1))
            
            elif isinstance(obj, dict) and current_depth < max_depth and len(obj) < 1000:
                for key, item in list(obj.items())[:100]:  # Limit and convert to list
                    new_path = f"{path}['{key}']" if path else f"dict['{key}']"
                    references.extend(search_object(item, new_path, max_depth, current_depth + 1))
        
        except Exception as e:
            # Skip problematic objects
            pass
        
        return references
    
    # Search through snapshot of variables
    all_references = []
    for var_name, obj in namespace_snapshot.items():
        if not var_name.startswith('_') and var_name not in ['In', 'Out']:  # Skip IPython internals
            try:
                references = search_object(obj, var_name)
                all_references.extend(references)
            except Exception as e:
                print(f"Skipped {var_name} due to error: {type(e).__name__}")
    
    # Display results
    if all_references:
        print("Found references to huge tensors:")
        for path, shape, size_mb, tensor_id in all_references:
            print(f"Variable: {path}")
            print(f"  Shape: {shape}")
            print(f"  Size: {size_mb:.2f} MB")
            print(f"  ID: {tensor_id}")
            print("-" * 40)
    else:
        print("No direct references found in user namespace.")

find_tensor_references_safe()

In [14]:
def check_autograd_tensors():
    print("Checking for autograd/gradient tensors...")
    
    # Find all tensors that require gradients
    all_tensors = [obj for obj in gc.get_objects() if isinstance(obj, torch.Tensor)]
    grad_tensors = [t for t in all_tensors if t.requires_grad and t.is_cuda]
    
    large_grad_tensors = []
    for tensor in grad_tensors:
        size_mb = tensor.numel() * tensor.element_size() / 1024 / 1024
        if size_mb > 100:  # Only show large ones
            large_grad_tensors.append((tensor, size_mb))
    
    large_grad_tensors.sort(key=lambda x: x[1], reverse=True)
    
    for tensor, size_mb in large_grad_tensors:
        print(f"Gradient tensor: Shape {tensor.shape}, Size: {size_mb:.2f} MB")
        print(f"  requires_grad: {tensor.requires_grad}")
        print(f"  grad_fn: {tensor.grad_fn}")
        print(f"  ID: {id(tensor)}")
        print("-" * 30)

check_autograd_tensors()

In [15]:
def find_computation_graph_tensors():
    print("Looking for tensors in computation graphs...")
    
    target_ids = {127537981066192, 127537981072672, 127537981065952}
    
    # Get all tensors
    all_tensors = [obj for obj in gc.get_objects() if isinstance(obj, torch.Tensor)]
    
    # Find tensors with grad_fn (part of computation graph)
    graph_tensors = []
    for tensor in all_tensors:
        if tensor.is_cuda and tensor.grad_fn is not None:
            size_mb = tensor.numel() * tensor.element_size() / 1024 / 1024
            if id(tensor) in target_ids or size_mb > 100:  # Large tensors or our targets
                graph_tensors.append((tensor, size_mb))
    
    graph_tensors.sort(key=lambda x: x[1], reverse=True)
    
    for tensor, size_mb in graph_tensors:
        print(f"Graph tensor: Shape {tensor.shape}, Size: {size_mb:.2f} MB")
        print(f"  grad_fn: {type(tensor.grad_fn).__name__ if tensor.grad_fn else None}")
        print(f"  ID: {id(tensor)} {'<-- TARGET' if id(tensor) in target_ids else ''}")
        print("-" * 30)

find_computation_graph_tensors()

In [16]:
import torch
import gc

print("Tensors on GPU:")
for obj in gc.get_objects():
    try:
        if torch.is_tensor(obj) and obj.is_cuda:
            print(f"  Size: {obj.size()}, Dtype: {obj.dtype}, Device: {obj.device}, Shape: {obj.shape}")
            # You can add more properties if needed, like obj.numel() for total elements
    except: # Handles cases where obj is not a tensor or other errors, or if it's a tensor that doesn't have a .is_cuda attribute (older PyTorch or different tensor types)
        pass

if torch.cuda.is_available():
    print("\nCUDA Memory Summary:")
    print(torch.cuda.memory_summary(device=None, abbreviated=False))
else:
    print("\nCUDA is not available.")
