# SciTeX Decorators Module Tutorial

This notebook demonstrates the powerful decorator utilities in SciTeX for type conversion, batch processing, caching, and more.

## 1. Setup and Imports

In [None]:
import scitex as stx
import numpy as np
import pandas as pd
import time
import os
from pathlib import Path

# Optional imports
try:
    import torch
    TORCH_AVAILABLE = True
except ImportError:
    TORCH_AVAILABLE = False
    print("PyTorch not available. Some examples will be skipped.")

try:
    import xarray as xr
    XARRAY_AVAILABLE = True
except ImportError:
    XARRAY_AVAILABLE = False
    print("Xarray not available. Some examples will be skipped.")

# Enable auto-ordering for decorators (recommended)
stx.decorators.enable_auto_order()
print("Auto-ordering enabled for decorators")

## 2. Type Conversion Decorators

### 2.1 NumPy Function Decorator

In [None]:
# Basic numpy_fn usage
@stx.decorators.numpy_fn
def compute_statistics(x):
    """Compute mean and std of data."""
    return {
        'mean': x.mean(),
        'std': x.std(),
        'shape': x.shape
    }

# Test with different input types
print("=== numpy_fn Decorator ===")

# Python list
list_data = [1, 2, 3, 4, 5]
result = compute_statistics(list_data)
print(f"\nList input: {list_data}")
print(f"Result: {result}")

# Pandas Series
series_data = pd.Series([10, 20, 30, 40, 50])
result = compute_statistics(series_data)
print(f"\nPandas Series input:")
print(series_data)
print(f"Result: {result}")

# Already numpy array
array_data = np.random.randn(3, 4)
result = compute_statistics(array_data)
print(f"\nNumPy array input shape: {array_data.shape}")
print(f"Result: {result}")

### 2.2 PyTorch Function Decorator

In [None]:
if TORCH_AVAILABLE:
    print("=== torch_fn Decorator ===")
    
    @stx.decorators.torch_fn
    def neural_computation(x, dim=-1):
        """Perform softmax computation."""
        return torch.softmax(x, dim=dim)
    
    # Test with different inputs
    # NumPy array
    np_data = np.random.randn(5, 3)
    result = neural_computation(np_data)
    print(f"NumPy input shape: {np_data.shape}")
    print(f"Result type: {type(result)}")
    print(f"Result shape: {result.shape}")
    print(f"Result device: {result.device}")
    
    # List of lists
    list_data = [[1, 2, 3], [4, 5, 6]]
    result = neural_computation(list_data)
    print(f"\nList input: {list_data}")
    print(f"Result:\n{result}")
    
    # With axis parameter (converts to dim)
    @stx.decorators.torch_fn
    def sum_along_axis(x, axis=None):
        """Sum along specified axis."""
        if axis is None:
            return x.sum()
        return x.sum(dim=axis)
    
    data = np.array([[1, 2, 3], [4, 5, 6]])
    print(f"\nOriginal data:\n{data}")
    print(f"Sum along axis 0: {sum_along_axis(data, axis=0)}")
    print(f"Sum along axis 1: {sum_along_axis(data, axis=1)}")
else:
    print("PyTorch not available, skipping torch_fn examples")

### 2.3 Pandas Function Decorator

In [None]:
print("=== pandas_fn Decorator ===")

@stx.decorators.pandas_fn
def analyze_dataframe(df):
    """Analyze DataFrame and return summary."""
    return {
        'shape': df.shape,
        'columns': df.columns.tolist(),
        'dtypes': df.dtypes.to_dict(),
        'missing': df.isnull().sum().to_dict(),
        'summary': df.describe()
    }

# Test with different inputs
# Dictionary input
dict_data = {
    'A': [1, 2, 3, 4],
    'B': [5.5, 6.5, 7.5, 8.5],
    'C': ['x', 'y', 'z', None]
}
result = analyze_dataframe(dict_data)
print("Dictionary input analysis:")
print(f"  Shape: {result['shape']}")
print(f"  Columns: {result['columns']}")
print(f"  Missing values: {result['missing']}")

# NumPy array input
array_data = np.random.randn(10, 3)
result = analyze_dataframe(array_data)
print("\nNumPy array input analysis:")
print(f"  Shape: {result['shape']}")
print(f"  Summary:\n{result['summary']}")

## 3. Batch Processing Decorator

### 3.1 Basic Batch Processing

In [None]:
print("=== batch_fn Decorator ===")

# Simple batch processing
@stx.decorators.batch_fn
def process_sample(x):
    """Process individual sample."""
    return x ** 2 + 2 * x + 1

# Process multiple samples
samples = np.array([1, 2, 3, 4, 5])
results = process_sample(samples)
print(f"Input samples: {samples}")
print(f"Processed results: {results}")

# 2D batch processing
@stx.decorators.batch_fn
def normalize_vector(v):
    """Normalize a single vector."""
    norm = np.linalg.norm(v)
    return v / norm if norm > 0 else v

# Process batch of vectors
vectors = np.random.randn(5, 3)  # 5 vectors of dimension 3
normalized = normalize_vector(vectors)
print(f"\nOriginal vectors shape: {vectors.shape}")
print(f"Normalized vectors shape: {normalized.shape}")
print(f"Norms after normalization: {np.linalg.norm(normalized, axis=1)}")

### 3.2 Batch Processing with Multiple Dimensions

In [None]:
# Batch processing over multiple dimensions
@stx.decorators.batch_fn(n_batch_dims=2)
def process_matrix(M):
    """Process individual 2x2 matrix."""
    return np.linalg.det(M)

# Create batch of 2x2 matrices
batch_size = 4
matrices = np.random.randn(batch_size, 2, 2)
determinants = process_matrix(matrices)

print("Batch processing of matrices:")
for i in range(batch_size):
    print(f"  Matrix {i} determinant: {determinants[i]:.4f}")

# Complex example: batch processing with scalar results
@stx.decorators.batch_fn
def classify_point(point):
    """Classify a 2D point into quadrants."""
    x, y = point
    if x >= 0 and y >= 0:
        return 1  # Quadrant I
    elif x < 0 and y >= 0:
        return 2  # Quadrant II
    elif x < 0 and y < 0:
        return 3  # Quadrant III
    else:
        return 4  # Quadrant IV

# Generate random points
points = np.random.randn(10, 2)
quadrants = classify_point(points)

print("\nPoint classification:")
for i, (point, quad) in enumerate(zip(points, quadrants)):
    print(f"  Point {i}: {point} -> Quadrant {quad}")

## 4. Combined Decorators

### 4.1 Auto-Ordering in Action

In [None]:
print("=== Combined Decorators with Auto-Ordering ===")

# Order doesn't matter with auto-ordering enabled!
@stx.decorators.batch_fn  # Will be reordered to apply second
@stx.decorators.numpy_fn  # Will be reordered to apply first
def compute_features(x):
    """Extract features from individual sample."""
    return np.array([
        x.mean(),
        x.std(),
        x.min(),
        x.max(),
        np.median(x)
    ])

# Test with batch of samples
batch_data = [  # List of lists
    [1, 2, 3, 4, 5],
    [10, 20, 30, 40, 50],
    [5, 5, 5, 5, 5]
]

features = compute_features(batch_data)
print("Extracted features:")
print(f"Shape: {features.shape}")
print("\nFeature matrix:")
print("Sample | Mean  | Std   | Min   | Max   | Median")
print("-" * 50)
for i, feat in enumerate(features):
    print(f"{i:6d} | {feat[0]:5.1f} | {feat[1]:5.1f} | {feat[2]:5.1f} | {feat[3]:5.1f} | {feat[4]:5.1f}")

### 4.2 PyTorch Batch Processing

In [None]:
if TORCH_AVAILABLE:
    print("=== PyTorch Batch Processing ===")
    
    # Use pre-combined decorator
    @stx.decorators.batch_torch_fn
    def apply_activation(x, temperature=1.0):
        """Apply temperature-scaled softmax."""
        return torch.softmax(x / temperature, dim=-1)
    
    # Test with batch
    logits = np.random.randn(5, 10)  # 5 samples, 10 classes
    
    # Different temperatures
    for temp in [0.5, 1.0, 2.0]:
        probs = apply_activation(logits, temperature=temp)
        entropy = -(probs * torch.log(probs + 1e-8)).sum(dim=-1).mean()
        print(f"\nTemperature {temp}:")
        print(f"  Output shape: {probs.shape}")
        print(f"  Average entropy: {entropy:.3f}")
        print(f"  Max probability: {probs.max(dim=-1)[0].mean():.3f}")

## 5. Caching Decorators

### 5.1 Memory Caching

In [None]:
print("=== cache_mem Decorator ===")

# Expensive computation with memory caching
@stx.decorators.cache_mem
def expensive_computation(n):
    """Simulate expensive computation."""
    print(f"Computing for n={n}...")
    time.sleep(1)  # Simulate work
    return sum(i**2 for i in range(n))

# First calls (slow)
print("First calls (computed):")
start = time.time()
result1 = expensive_computation(1000)
time1 = time.time() - start
print(f"  n=1000: {result1} (took {time1:.3f}s)")

start = time.time()
result2 = expensive_computation(2000)
time2 = time.time() - start
print(f"  n=2000: {result2} (took {time2:.3f}s)")

# Repeated calls (cached, fast)
print("\nRepeated calls (cached):")
start = time.time()
result1_cached = expensive_computation(1000)
time1_cached = time.time() - start
print(f"  n=1000: {result1_cached} (took {time1_cached:.6f}s)")

start = time.time()
result2_cached = expensive_computation(2000)
time2_cached = time.time() - start
print(f"  n=2000: {result2_cached} (took {time2_cached:.6f}s)")

print(f"\nSpeedup: {time1/time1_cached:.0f}x")

### 5.2 Disk Caching

In [None]:
print("=== cache_disk Decorator ===")

# Large computation with disk caching
@stx.decorators.cache_disk
def generate_large_dataset(size, seed=42):
    """Generate large random dataset."""
    print(f"Generating dataset of size {size} with seed {seed}...")
    np.random.seed(seed)
    data = np.random.randn(size, 100)
    return {
        'data': data,
        'mean': data.mean(),
        'std': data.std(),
        'size': size
    }

# First call (generates and caches)
print("First call (generating):")
start = time.time()
dataset1 = generate_large_dataset(10000)
time1 = time.time() - start
print(f"  Generated in {time1:.3f}s")
print(f"  Mean: {dataset1['mean']:.6f}")

# Second call (loads from cache)
print("\nSecond call (from cache):")
start = time.time()
dataset2 = generate_large_dataset(10000)
time2 = time.time() - start
print(f"  Loaded in {time2:.3f}s")
print(f"  Mean: {dataset2['mean']:.6f}")
print(f"  Data identical: {np.array_equal(dataset1['data'], dataset2['data'])}")

# Different parameters (new computation)
print("\nDifferent parameters:")
dataset3 = generate_large_dataset(5000)  # Different size
print(f"  Size: {dataset3['size']}")

# Show cache location
cache_dir = Path.home() / ".cache" / "scitex" / "cache"
print(f"\nCache directory: {cache_dir}")
if cache_dir.exists():
    cache_files = list(cache_dir.rglob("*"))
    print(f"Cache files: {len(cache_files)} files")

## 6. Utility Decorators

### 6.1 Timeout Decorator

In [None]:
print("=== timeout Decorator ===")

# Function with timeout
@stx.decorators.timeout(seconds=2, error_message="Function took too long!")
def potentially_slow_function(sleep_time):
    """Function that might take too long."""
    print(f"Processing for {sleep_time} seconds...")
    time.sleep(sleep_time)
    return f"Completed after {sleep_time}s"

# Fast execution (within timeout)
try:
    result = potentially_slow_function(1)
    print(f"Fast execution: {result}")
except Exception as e:
    print(f"Error: {e}")

# Slow execution (exceeds timeout)
print("\nTrying slow execution...")
try:
    result = potentially_slow_function(3)
    print(f"Slow execution: {result}")
except Exception as e:
    print(f"Timeout error: {e}")

### 6.2 Deprecation Decorator

In [None]:
print("=== deprecated Decorator ===")

# Mark function as deprecated
@stx.decorators.deprecated("Use new_function() instead")
def old_function(x):
    """Old function that should not be used."""
    return x * 2

# Using deprecated function shows warning
import warnings
with warnings.catch_warnings(record=True) as w:
    warnings.simplefilter("always")
    result = old_function(5)
    print(f"Result: {result}")
    if w:
        print(f"Warning: {w[0].message}")

# New function (no warning)
def new_function(x):
    """New improved function."""
    return x * 2 + 1

result = new_function(5)
print(f"\nNew function result: {result} (no warning)")

## 7. Real-World Examples

### 7.1 Data Processing Pipeline

In [None]:
# Complete data processing pipeline
class DataProcessor:
    """Data processing with decorators."""
    
    @stx.decorators.cache_mem
    @stx.decorators.numpy_fn
    def load_and_preprocess(self, data, normalize=True):
        """Load and preprocess data."""
        # Center the data
        centered = data - data.mean(axis=0)
        
        if normalize:
            # Normalize to unit variance
            std = centered.std(axis=0)
            std[std == 0] = 1  # Avoid division by zero
            normalized = centered / std
            return normalized
        
        return centered
    
    @stx.decorators.batch_fn
    @stx.decorators.numpy_fn
    def extract_features(self, sample):
        """Extract features from single sample."""
        features = [
            sample.mean(),
            sample.std(),
            np.percentile(sample, [25, 50, 75]),
            sample.max() - sample.min(),  # Range
            np.abs(sample).mean(),  # Mean absolute value
        ]
        # Flatten nested arrays
        flat_features = []
        for f in features:
            if isinstance(f, np.ndarray):
                flat_features.extend(f)
            else:
                flat_features.append(f)
        return np.array(flat_features)
    
    @stx.decorators.timeout(seconds=5)
    def process_dataset(self, raw_data):
        """Complete processing pipeline."""
        # Preprocess
        preprocessed = self.load_and_preprocess(raw_data)
        print(f"Preprocessed shape: {preprocessed.shape}")
        
        # Extract features
        features = self.extract_features(preprocessed)
        print(f"Features shape: {features.shape}")
        
        return {
            'preprocessed': preprocessed,
            'features': features,
            'n_samples': len(preprocessed),
            'n_features': features.shape[1]
        }

# Use the processor
processor = DataProcessor()

# Generate sample data
raw_data = np.random.randn(100, 20)  # 100 samples, 20 dimensions

# Process data
print("Processing dataset...")
results = processor.process_dataset(raw_data)

print(f"\nProcessing complete:")
print(f"  Original shape: {raw_data.shape}")
print(f"  Preprocessed shape: {results['preprocessed'].shape}")
print(f"  Features per sample: {results['n_features']}")

# Second call uses cache
print("\nSecond processing (using cache)...")
start = time.time()
results2 = processor.process_dataset(raw_data)
print(f"Completed in {time.time() - start:.4f}s (cached)")

### 7.2 Machine Learning Pipeline

In [None]:
if TORCH_AVAILABLE:
    print("=== ML Pipeline with Decorators ===")
    
    class MLPipeline:
        """Machine learning pipeline with decorators."""
        
        @stx.decorators.torch_fn
        @stx.decorators.cache_mem
        def create_embeddings(self, data, embedding_dim=64):
            """Create random embeddings (simulated)."""
            n_samples = len(data)
            # Simulate embedding generation
            embeddings = torch.randn(n_samples, embedding_dim)
            return embeddings / embeddings.norm(dim=1, keepdim=True)
        
        @stx.decorators.batch_torch_fn(n_batch_dims=1)
        def compute_similarity(self, embedding1, embedding2):
            """Compute cosine similarity between embeddings."""
            return torch.cosine_similarity(embedding1, embedding2, dim=0)
        
        @stx.decorators.numpy_fn
        def cluster_embeddings(self, embeddings, n_clusters=3):
            """Simple k-means clustering."""
            from sklearn.cluster import KMeans
            
            kmeans = KMeans(n_clusters=n_clusters, random_state=42)
            clusters = kmeans.fit_predict(embeddings)
            
            return {
                'clusters': clusters,
                'centers': kmeans.cluster_centers_,
                'inertia': kmeans.inertia_
            }
    
    # Use the pipeline
    pipeline = MLPipeline()
    
    # Generate data
    data = np.random.randn(50, 10)
    
    # Create embeddings
    embeddings = pipeline.create_embeddings(data, embedding_dim=32)
    print(f"Embeddings shape: {embeddings.shape}")
    print(f"Embeddings normalized: {torch.allclose(embeddings.norm(dim=1), torch.ones(len(embeddings)))}")
    
    # Compute pairwise similarities
    n_samples = 5
    similarities = torch.zeros(n_samples, n_samples)
    
    for i in range(n_samples):
        for j in range(n_samples):
            similarities[i, j] = pipeline.compute_similarity(
                embeddings[i], embeddings[j]
            )
    
    print(f"\nSimilarity matrix (first {n_samples} samples):")
    print(similarities.numpy().round(3))
    
    # Cluster embeddings
    cluster_results = pipeline.cluster_embeddings(embeddings.numpy())
    print(f"\nClustering results:")
    print(f"  Unique clusters: {np.unique(cluster_results['clusters'])}")
    print(f"  Cluster sizes: {np.bincount(cluster_results['clusters'])}")
    print(f"  Inertia: {cluster_results['inertia']:.2f}")

## 8. Advanced Patterns

### 8.1 Custom Decorator Combinations

In [None]:
# Create custom decorator combinations
def robust_processor(timeout_seconds=10):
    """Combine multiple decorators for robust processing."""
    def decorator(func):
        # Apply decorators in specific order
        func = stx.decorators.cache_disk(func)
        func = stx.decorators.timeout(seconds=timeout_seconds)(func)
        func = stx.decorators.numpy_fn(func)
        func = stx.decorators.batch_fn(func)
        return func
    return decorator

# Use custom decorator
@robust_processor(timeout_seconds=5)
def analyze_signal(signal):
    """Analyze signal with FFT."""
    fft = np.fft.fft(signal)
    freqs = np.fft.fftfreq(len(signal))
    
    # Find dominant frequency
    power = np.abs(fft)**2
    dominant_freq_idx = np.argmax(power[1:len(signal)//2]) + 1
    dominant_freq = freqs[dominant_freq_idx]
    
    return {
        'dominant_freq': dominant_freq,
        'total_power': power.sum(),
        'dc_component': np.abs(fft[0])
    }

# Generate test signals
t = np.linspace(0, 1, 1000)
signals = [
    np.sin(2 * np.pi * 5 * t),    # 5 Hz
    np.sin(2 * np.pi * 10 * t),   # 10 Hz
    np.sin(2 * np.pi * 5 * t) + np.sin(2 * np.pi * 15 * t),  # Mixed
]

# Analyze batch of signals
print("Analyzing signals...")
results = analyze_signal(signals)

print("\nSignal analysis results:")
for i, result in enumerate(results):
    print(f"  Signal {i+1}:")
    print(f"    Dominant frequency: {result['dominant_freq']:.2f} Hz")
    print(f"    Total power: {result['total_power']:.2f}")

### 8.2 Decorator Introspection

In [None]:
# Examine decorator behavior
def analyze_decorator_chain(func):
    """Analyze the decorator chain of a function."""
    print(f"Function: {func.__name__}")
    print(f"Module: {func.__module__}")
    
    # Check for wrapped function
    if hasattr(func, '__wrapped__'):
        print("Decorators detected:")
        current = func
        depth = 0
        while hasattr(current, '__wrapped__'):
            print(f"  Level {depth}: {current.__class__.__name__ if hasattr(current, '__class__') else 'Unknown'}")
            current = current.__wrapped__
            depth += 1
    else:
        print("No decorators detected")

# Test with decorated function
@stx.decorators.cache_mem
@stx.decorators.numpy_fn
def test_function(x):
    return x.mean()

analyze_decorator_chain(test_function)

# Test execution
print("\nTest execution:")
data = [1, 2, 3, 4, 5]
result = test_function(data)
print(f"Result: {result}")

## 9. Best Practices Summary

### Key Takeaways

1. **Auto-Ordering**: Enable with `enable_auto_order()` to avoid manual ordering
2. **Type Safety**: Decorators handle type conversions automatically
3. **Performance**: Use caching decorators for expensive computations
4. **Batch Processing**: Process multiple samples efficiently
5. **Robustness**: Combine decorators for production-ready code

### Best Practices

1. **Enable Auto-Ordering**:
   ```python
   from scitex.decorators import enable_auto_order
   enable_auto_order()  # At the start of your script
   ```

2. **Choose the Right Cache**:
   ```python
   @cache_mem  # For small, frequently accessed data
   @cache_disk  # For large computations or persistent cache
   ```

3. **Combine Decorators Wisely**:
   ```python
   @batch_fn      # Process in batches
   @torch_fn      # Convert to PyTorch
   @cache_mem     # Cache results
   def process(x):
       return model(x)
   ```

4. **Handle Timeouts Gracefully**:
   ```python
   @timeout(seconds=30, error_message="Custom timeout message")
   def long_computation():
       # Add checkpoints for partial results
       pass
   ```

In [None]:
print("\nDecorators module tutorial completed!")
print("\nNext steps:")
print("1. Enable auto-ordering in your projects")
print("2. Use type conversion decorators for seamless integration")
print("3. Add caching to expensive computations")
print("4. Batch process data for better performance")