# Troubleshooting and Best Practices

This notebook collects common issues, solutions, and best practices for using Promethium.

**Prerequisites:**
- Python 3.10+
- Some experience with Promethium

**Topics Covered:**
- Common error patterns
- Diagnostic utilities
- Performance optimization
- Best practices

In [None]:
# Uncomment to install:
# !pip install promethium-seismic==1.0.3

In [None]:
import promethium
print(f"Promethium version: {promethium.__version__}")

## 1. Environment Diagnostics

In [None]:
import sys
import numpy as np
import torch

print("System Information")
print("=" * 50)
print(f"Python version: {sys.version}")
print(f"NumPy version: {np.__version__}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Memory check
import psutil

mem = psutil.virtual_memory()
print(f"RAM Total: {mem.total / 1e9:.1f} GB")
print(f"RAM Available: {mem.available / 1e9:.1f} GB")
print(f"RAM Used: {mem.percent:.1f}%")

if torch.cuda.is_available():
    gpu_mem = torch.cuda.get_device_properties(0).total_memory
    print(f"GPU Memory: {gpu_mem / 1e9:.1f} GB")

## 2. Common Issues and Solutions

### Issue 1: Shape Mismatch

In [None]:
# Problem: Data has wrong dimensions
# Solution: Check and reshape

def check_data_shape(data, expected_ndim=2):
    """Validate data shape and provide diagnostics."""
    print(f"Data shape: {data.shape}")
    print(f"Data ndim: {data.ndim}")
    print(f"Data dtype: {data.dtype}")
    
    if data.ndim != expected_ndim:
        print(f"WARNING: Expected {expected_ndim}D, got {data.ndim}D")
        if data.ndim == 1:
            print("  Suggestion: data = data.reshape(1, -1)")
        elif data.ndim == 3:
            print("  Suggestion: data = data.squeeze()")
    else:
        print("OK: Shape is correct")

# Example
from promethium import generate_synthetic_traces
data, _ = generate_synthetic_traces(n_traces=10, n_samples=100)
check_data_shape(data)

### Issue 2: NaN or Inf Values

In [None]:
def check_data_validity(data):
    """Check for NaN and Inf values."""
    nan_count = np.sum(np.isnan(data))
    inf_count = np.sum(np.isinf(data))
    
    print(f"NaN values: {nan_count}")
    print(f"Inf values: {inf_count}")
    
    if nan_count > 0 or inf_count > 0:
        print("WARNING: Invalid values detected")
        print("  Solution: data = np.nan_to_num(data, nan=0.0, posinf=0.0, neginf=0.0)")
    else:
        print("OK: No invalid values")
    
    return nan_count == 0 and inf_count == 0

check_data_validity(data)

### Issue 3: GPU Memory Errors

In [None]:
def clear_gpu_memory():
    """Clear GPU memory cache."""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        print("GPU memory cache cleared")
        
        # Report current usage
        allocated = torch.cuda.memory_allocated() / 1e9
        reserved = torch.cuda.memory_reserved() / 1e9
        print(f"Allocated: {allocated:.2f} GB")
        print(f"Reserved: {reserved:.2f} GB")
    else:
        print("No GPU available")

clear_gpu_memory()

## 3. Performance Optimization

In [None]:
import time

def benchmark_operation(func, *args, n_runs=5, **kwargs):
    """Benchmark an operation."""
    times = []
    
    for _ in range(n_runs):
        start = time.perf_counter()
        result = func(*args, **kwargs)
        end = time.perf_counter()
        times.append(end - start)
    
    print(f"Mean time: {np.mean(times)*1000:.2f} ms")
    print(f"Std time: {np.std(times)*1000:.2f} ms")
    print(f"Min time: {np.min(times)*1000:.2f} ms")
    print(f"Max time: {np.max(times)*1000:.2f} ms")
    
    return result

# Example benchmark
from promethium import bandpass_filter
print("Benchmarking bandpass filter:")
_ = benchmark_operation(bandpass_filter, data[0], lowcut=5, highcut=50, fs=250)

## 4. Best Practices Summary

### Data Handling
1. Always validate data shape and dtype before processing
2. Use float32 for memory efficiency
3. Check for NaN/Inf values after operations

### Memory Management
1. Process large datasets in chunks
2. Delete intermediate variables with `del`
3. Call `torch.cuda.empty_cache()` after GPU operations

### Reproducibility
1. Always call `set_seed()` at the start
2. Document library versions
3. Save both data and configuration

### Performance
1. Use GPU when available
2. Batch processing for multiple traces
3. Profile before optimizing

In [None]:
# Template for reproducible experiments
print("Reproducible Experiment Template:")
print("""
# 1. Set random seeds
from promethium import set_seed
set_seed(42)

# 2. Document versions
import promethium
print(f"Promethium: {promethium.__version__}")

# 3. Configure device
from promethium import get_device
device = get_device()

# 4. Load and validate data
data = load_your_data()
assert data.shape == expected_shape
assert not np.any(np.isnan(data))

# 5. Run pipeline
pipeline = SeismicRecoveryPipeline.from_preset('...')
result = pipeline.run(data)

# 6. Evaluate and save
metrics = evaluate_reconstruction(ground_truth, result)
np.save('results.npy', result)
""")