# End-to-End Case Study: Synthetic Data

A complete workflow example using synthetic seismic data.

**Prerequisites:** Python 3.10+, notebook 01 completed

**Estimated Runtime:** 10 minutes

In [None]:
# !pip install promethium-seismic==1.0.1

In [None]:
import promethium
from promethium import (
    SeismicRecoveryPipeline,
    evaluate_reconstruction,
    generate_synthetic_traces,
    add_noise,
    bandpass_filter,
    set_seed,
    get_device,
)
from promethium.utils.synthetic import create_missing_traces

import numpy as np
import matplotlib.pyplot as plt

set_seed(42)
device = get_device()
print(f"Promethium {promethium.__version__} | Device: {device}")

## 1. Generate Synthetic Dataset

In [None]:
# Generate layered synthetic data
clean_data, metadata = generate_synthetic_traces(
    n_traces=128,
    n_samples=512,
    sample_rate=250.0,
    frequencies=[5.0, 15.0, 30.0, 50.0],
    seed=42
)

print(f"Dataset: {clean_data.shape[0]} traces x {clean_data.shape[1]} samples")
print(f"Duration: {metadata['duration']:.2f} s")

## 2. Corrupt Data

In [None]:
# Add noise
noisy_data = add_noise(clean_data, noise_level=0.25, seed=42)

# Create missing traces
corrupted_data, mask = create_missing_traces(noisy_data, missing_ratio=0.2, seed=42)

print(f"Added 25% noise")
print(f"Missing traces: {int(np.sum(mask == 0))} ({20}%)")

## 3. Preprocessing

In [None]:
# Apply bandpass filter to reduce noise
preprocessed = np.array([
    bandpass_filter(trace, lowcut=2.0, highcut=80.0, fs=metadata['sample_rate'])
    for trace in corrupted_data
])

print(f"Preprocessed shape: {preprocessed.shape}")

## 4. Recovery Pipeline

In [None]:
# Run recovery
try:
    pipeline = SeismicRecoveryPipeline.from_preset('unet_denoise_v1')
    reconstructed = pipeline.run(preprocessed)
except Exception:
    from scipy.ndimage import gaussian_filter1d
    reconstructed = np.array([gaussian_filter1d(t, sigma=1.5) for t in preprocessed])

print(f"Reconstructed: {reconstructed.shape}")

## 5. Evaluation

In [None]:
metrics_corrupted = evaluate_reconstruction(clean_data, corrupted_data)
metrics_recovered = evaluate_reconstruction(clean_data, reconstructed)

print(f"{'Stage':>15} {'SNR (dB)':>12} {'SSIM':>10}")
print("-" * 40)
print(f"{'Corrupted':>15} {metrics_corrupted['snr']:>12.2f} {metrics_corrupted['ssim']:>10.4f}")
print(f"{'Recovered':>15} {metrics_recovered['snr']:>12.2f} {metrics_recovered['ssim']:>10.4f}")

In [None]:
# Visualization
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
clip = np.percentile(np.abs(clean_data), 99)

for ax, (title, data) in zip(axes.flatten(), [
    ('Original', clean_data),
    ('Corrupted', corrupted_data),
    ('Preprocessed', preprocessed),
    ('Reconstructed', reconstructed)
]):
    ax.imshow(data.T[:200], aspect='auto', cmap='seismic', vmin=-clip, vmax=clip)
    ax.set_title(title)
    ax.set_xlabel('Trace')
    ax.set_ylabel('Sample')

plt.tight_layout()
plt.show()

## Summary

Complete workflow demonstrated:
1. Data generation
2. Corruption (noise + missing traces)
3. Preprocessing (filtering)
4. Recovery (pipeline)
5. Quantitative evaluation