# End-to-End Case Study: Real-World Data

Workflow for processing real seismic data files.

**Prerequisites:** SEG-Y or miniSEED data file

**Data Sources:**
- IRIS Data Services (https://ds.iris.edu)
- USGS Earthquake Hazards (https://earthquake.usgs.gov)
- Public seismic datasets on Kaggle

In [None]:
# !pip install promethium-seismic==1.0.3

In [None]:
import promethium
from promethium import (
    load_segy,
    load_miniseed,
    SeismicRecoveryPipeline,
    evaluate_reconstruction,
    bandpass_filter,
    set_seed,
)

import numpy as np
import matplotlib.pyplot as plt
import os

set_seed(42)
print(f"Promethium {promethium.__version__}")

## 1. Load Data

Replace the path with your actual data file.

In [None]:
# Example paths - replace with your data
DATA_PATH = './data/survey.sgy'  # or .mseed

if os.path.exists(DATA_PATH):
    if DATA_PATH.endswith(('.sgy', '.segy')):
        data = load_segy(DATA_PATH)
    else:
        data = load_miniseed(DATA_PATH)
    print(f"Loaded: {data.shape}")
else:
    print(f"Data file not found: {DATA_PATH}")
    print("Using synthetic data for demonstration")
    from promethium import generate_synthetic_traces, add_noise
    clean, meta = generate_synthetic_traces(n_traces=100, n_samples=1000, seed=42)
    data = add_noise(clean, noise_level=0.3, seed=42)
    print(f"Generated: {data.shape}")

## 2. Quality Control

In [None]:
# Basic QC
print("Data Statistics:")
print(f"  Shape: {data.shape}")
print(f"  Min: {np.min(data):.4f}")
print(f"  Max: {np.max(data):.4f}")
print(f"  Mean: {np.mean(data):.4f}")
print(f"  Std: {np.std(data):.4f}")

# Check for issues
nan_count = np.sum(np.isnan(data))
inf_count = np.sum(np.isinf(data))
print(f"  NaN values: {nan_count}")
print(f"  Inf values: {inf_count}")

## 3. Preprocessing

In [None]:
# Apply preprocessing
fs = 250.0  # Adjust based on your data

processed = np.array([
    bandpass_filter(trace, lowcut=1.0, highcut=100.0, fs=fs)
    for trace in data
])

print(f"Preprocessed: {processed.shape}")

## 4. Run Pipeline

In [None]:
try:
    pipeline = SeismicRecoveryPipeline.from_preset('unet_denoise_v1')
    result = pipeline.run(processed)
except Exception as e:
    from scipy.ndimage import gaussian_filter1d
    result = np.array([gaussian_filter1d(t, sigma=1.5) for t in processed])

print(f"Result: {result.shape}")

## 5. Visualization

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

clip = np.percentile(np.abs(data), 99)

axes[0].imshow(data.T[:500], aspect='auto', cmap='seismic', vmin=-clip, vmax=clip)
axes[0].set_title('Input Data')
axes[0].set_xlabel('Trace')
axes[0].set_ylabel('Sample')

axes[1].imshow(result.T[:500], aspect='auto', cmap='seismic', vmin=-clip, vmax=clip)
axes[1].set_title('Processed Result')
axes[1].set_xlabel('Trace')
axes[1].set_ylabel('Sample')

plt.tight_layout()
plt.show()

## 6. Save Results

In [None]:
output_dir = './output'
os.makedirs(output_dir, exist_ok=True)

np.save(os.path.join(output_dir, 'processed_data.npy'), result)
print(f"Saved to {output_dir}/processed_data.npy")

## Summary

Demonstrated workflow for real data:
1. Data loading (SEG-Y, miniSEED)
2. Quality control
3. Preprocessing (filtering)
4. Recovery pipeline
5. Visualization and export