# 1. Download LAZ Data - Sierra Nevada Giant Forest

This notebook downloads LiDAR point cloud data (LAZ format) from the USGS 3DEP program for the Giant Forest area in Sequoia National Park.

**Data Source:** [USGS 3DEP - CA Sierra Nevada 2022](https://rockyweb.usgs.gov/vdelivery/Datasets/Staged/Elevation/LPC/Projects/CA_SierraNevada_B22/CA_SierraNevada_9_2022/)

**Target Area:** Giant Forest, Sequoia National Park
- Center: 36.56°N, -118.75°W
- Contains General Sherman Tree (world's largest tree by volume)

## Setup

In [None]:
import os
import re
import json
import requests
import subprocess
from pathlib import Path
from datetime import datetime
import numpy as np
from pyproj import Transformer

# Data directories
DATA_ROOT = Path.home() / "data-store" / "data" / "output" / "sierra-nevada"
RAW_DIR = DATA_ROOT / "raw"
LAZ_DIR = RAW_DIR / "laz"

# Ensure directories exist
LAZ_DIR.mkdir(parents=True, exist_ok=True)

# USGS source
BASE_URL = "https://rockyweb.usgs.gov/vdelivery/Datasets/Staged/Elevation/LPC/Projects/CA_SierraNevada_B22/CA_SierraNevada_9_2022"

print(f"Data root: {DATA_ROOT}")
print(f"LAZ directory: {LAZ_DIR}")

## Define Target Area: Giant Forest

The Giant Forest area in Sequoia National Park. We'll define a bounding box that captures:
- General Sherman Tree
- Congress Trail area
- Crescent Meadow
- Giant Forest Museum area

In [None]:
# Giant Forest bounding box in WGS84 (lat/lon)
BBOX_WGS84 = {
    "north": 36.60,   # North boundary
    "south": 36.52,   # South boundary
    "east": -118.70,  # East boundary
    "west": -118.80   # West boundary
}

# Key landmarks (for reference)
LANDMARKS = {
    "General Sherman Tree": (36.5819, -118.7514),
    "Giant Forest Museum": (36.5642, -118.7511),
    "Moro Rock": (36.5458, -118.7647),
    "Crescent Meadow": (36.5536, -118.7450),
    "Congress Trail": (36.5800, -118.7550)
}

print(f"Bounding box (WGS84):")
print(f"  North: {BBOX_WGS84['north']}°N")
print(f"  South: {BBOX_WGS84['south']}°N")
print(f"  East: {BBOX_WGS84['east']}°W")
print(f"  West: {BBOX_WGS84['west']}°W")
print(f"\nArea: ~{abs(BBOX_WGS84['north']-BBOX_WGS84['south'])*111:.1f}km x ~{abs(BBOX_WGS84['east']-BBOX_WGS84['west'])*111*np.cos(np.radians(36.56)):.1f}km")

In [None]:
# Convert to UTM Zone 11N (EPSG:32611) for tile matching
transformer = Transformer.from_crs("EPSG:4326", "EPSG:32611", always_xy=True)

# Convert corners to UTM
west_utm, south_utm = transformer.transform(BBOX_WGS84["west"], BBOX_WGS84["south"])
east_utm, north_utm = transformer.transform(BBOX_WGS84["east"], BBOX_WGS84["north"])

BBOX_UTM = {
    "min_easting": west_utm,
    "max_easting": east_utm,
    "min_northing": south_utm,
    "max_northing": north_utm
}

print(f"Bounding box (UTM Zone 11N / EPSG:32611):")
print(f"  Easting: {BBOX_UTM['min_easting']:.0f} to {BBOX_UTM['max_easting']:.0f}")
print(f"  Northing: {BBOX_UTM['min_northing']:.0f} to {BBOX_UTM['max_northing']:.0f}")

# Convert landmarks to UTM
print("\nLandmarks in UTM:")
for name, (lat, lon) in LANDMARKS.items():
    e, n = transformer.transform(lon, lat)
    print(f"  {name}: E={e:.0f}, N={n:.0f}")

## Download and Parse File List

Download the master file list to identify available tiles and their naming convention.

In [None]:
# Download file list
file_list_path = RAW_DIR / "0_file_download_links.txt"

if not file_list_path.exists():
    print("Downloading file list...")
    url = f"{BASE_URL}/0_file_download_links.txt"
    response = requests.get(url)
    response.raise_for_status()
    file_list_path.write_text(response.text)
    print(f"Downloaded to {file_list_path}")
else:
    print(f"File list already exists: {file_list_path}")

# Read file list
with open(file_list_path, 'r') as f:
    all_links = [line.strip() for line in f if line.strip()]

print(f"Total links in file: {len(all_links)}")

In [None]:
# Filter for LAZ files only
laz_links = [link for link in all_links if link.endswith('.laz')]
print(f"Total LAZ files available: {len(laz_links)}")

# Show sample filenames to understand naming convention
print("\nSample LAZ filenames:")
for link in laz_links[:10]:
    print(f"  {os.path.basename(link)}")

In [None]:
# Analyze tile naming convention
# Common patterns:
# - USGS_LPC_<project>_<tileid>.laz
# - Tile ID might be USNG grid (e.g., 11SKA1234) or UTM coordinates

sample_names = [os.path.basename(link) for link in laz_links[:50]]

# Try to extract coordinate patterns
coord_pattern = re.compile(r'(\d{6,7})_(\d{7})')
usng_pattern = re.compile(r'(\d{2}[A-Z]{3})(\d{4,5})(\d{4,5})')

print("Analyzing tile naming patterns...\n")

for name in sample_names[:20]:
    coord_match = coord_pattern.search(name)
    usng_match = usng_pattern.search(name)
    
    if coord_match:
        easting, northing = coord_match.groups()
        print(f"  {name}")
        print(f"    -> UTM coords: E={easting}, N={northing}")
    elif usng_match:
        zone, e, n = usng_match.groups()
        print(f"  {name}")
        print(f"    -> USNG: {zone} {e} {n}")
    else:
        print(f"  {name} (pattern not matched)")

## Filter Tiles for Giant Forest Area

In [None]:
def extract_tile_coords(filename):
    """
    Extract UTM coordinates from tile filename.
    Handles multiple naming conventions.
    Returns (easting, northing) or None if not parseable.
    """
    # Pattern 1: UTM coordinates embedded in filename
    # e.g., USGS_LPC_CA_SierraNevada_9_2022_333000_4045000.laz
    coord_pattern = re.compile(r'(\d{6})_(\d{7})')
    match = coord_pattern.search(filename)
    if match:
        return (int(match.group(1)), int(match.group(2)))
    
    # Pattern 2: USNG grid reference
    # e.g., 11SKA3350045000.laz
    usng_pattern = re.compile(r'11S[A-Z]{2}(\d{5})(\d{5})')
    match = usng_pattern.search(filename)
    if match:
        # Convert USNG to UTM (approximate - USNG uses 100km grid)
        e = int(match.group(1))
        n = int(match.group(2))
        # Need to add USNG grid zone offset (varies by grid square)
        return (e, n)  # May need adjustment based on actual naming
    
    return None

# Build list of tiles with coordinates
tiles_with_coords = []
unmatched_tiles = []

for link in laz_links:
    filename = os.path.basename(link)
    coords = extract_tile_coords(filename)
    if coords:
        tiles_with_coords.append({
            'url': link,
            'filename': filename,
            'easting': coords[0],
            'northing': coords[1]
        })
    else:
        unmatched_tiles.append(filename)

print(f"Tiles with parsed coordinates: {len(tiles_with_coords)}")
print(f"Tiles with unmatched patterns: {len(unmatched_tiles)}")

if unmatched_tiles:
    print(f"\nSample unmatched tiles:")
    for t in unmatched_tiles[:5]:
        print(f"  {t}")

In [None]:
# Analyze coordinate ranges to understand tile coverage
if tiles_with_coords:
    eastings = [t['easting'] for t in tiles_with_coords]
    northings = [t['northing'] for t in tiles_with_coords]
    
    print(f"Dataset coordinate ranges:")
    print(f"  Easting: {min(eastings):,} to {max(eastings):,}")
    print(f"  Northing: {min(northings):,} to {max(northings):,}")
    
    print(f"\nTarget area (Giant Forest):")
    print(f"  Easting: {BBOX_UTM['min_easting']:,.0f} to {BBOX_UTM['max_easting']:,.0f}")
    print(f"  Northing: {BBOX_UTM['min_northing']:,.0f} to {BBOX_UTM['max_northing']:,.0f}")

In [None]:
# Filter tiles that intersect our bounding box
# Add buffer for tile size (typically 1000m x 1000m)
TILE_SIZE = 1000  # meters
BUFFER = TILE_SIZE  # Include adjacent tiles

def tile_intersects_bbox(tile, bbox, tile_size=1000):
    """Check if a tile intersects the bounding box."""
    # Tile covers from (easting, northing) to (easting+tile_size, northing+tile_size)
    tile_min_e = tile['easting']
    tile_max_e = tile['easting'] + tile_size
    tile_min_n = tile['northing']
    tile_max_n = tile['northing'] + tile_size
    
    # Check intersection
    return (tile_min_e <= bbox['max_easting'] and 
            tile_max_e >= bbox['min_easting'] and
            tile_min_n <= bbox['max_northing'] and 
            tile_max_n >= bbox['min_northing'])

# Apply buffer to bbox
bbox_buffered = {
    'min_easting': BBOX_UTM['min_easting'] - BUFFER,
    'max_easting': BBOX_UTM['max_easting'] + BUFFER,
    'min_northing': BBOX_UTM['min_northing'] - BUFFER,
    'max_northing': BBOX_UTM['max_northing'] + BUFFER
}

# Filter tiles
giant_forest_tiles = [
    t for t in tiles_with_coords 
    if tile_intersects_bbox(t, bbox_buffered, TILE_SIZE)
]

print(f"Tiles intersecting Giant Forest area: {len(giant_forest_tiles)}")

if giant_forest_tiles:
    print(f"\nTile coordinate range:")
    eastings = [t['easting'] for t in giant_forest_tiles]
    northings = [t['northing'] for t in giant_forest_tiles]
    print(f"  Easting: {min(eastings):,} to {max(eastings):,}")
    print(f"  Northing: {min(northings):,} to {max(northings):,}")
    
    print(f"\nSample tiles:")
    for t in giant_forest_tiles[:10]:
        print(f"  {t['filename']}")

In [None]:
# If no tiles matched, let's do a broader search
# and examine the actual coordinate patterns more carefully

if len(giant_forest_tiles) == 0:
    print("No tiles matched. Examining all tile coordinates...")
    
    # Find tiles closest to our target area
    target_e = (BBOX_UTM['min_easting'] + BBOX_UTM['max_easting']) / 2
    target_n = (BBOX_UTM['min_northing'] + BBOX_UTM['max_northing']) / 2
    
    print(f"\nTarget center: E={target_e:.0f}, N={target_n:.0f}")
    
    if tiles_with_coords:
        # Sort by distance to target
        for t in tiles_with_coords:
            t['distance'] = np.sqrt((t['easting'] - target_e)**2 + (t['northing'] - target_n)**2)
        
        sorted_tiles = sorted(tiles_with_coords, key=lambda x: x['distance'])
        
        print(f"\n10 Closest tiles to target:")
        for t in sorted_tiles[:10]:
            print(f"  {t['filename']}")
            print(f"    E={t['easting']:,}, N={t['northing']:,}, dist={t['distance']/1000:.1f}km")
else:
    print(f"\n✓ Found {len(giant_forest_tiles)} tiles for Giant Forest area")

## Save Tile List for Download

In [None]:
# Save filtered tile list
tiles_to_download = giant_forest_tiles if giant_forest_tiles else []

if tiles_to_download:
    # Save URLs to file for wget
    download_list_path = RAW_DIR / "giant_forest_tiles.txt"
    with open(download_list_path, 'w') as f:
        for tile in tiles_to_download:
            f.write(tile['url'] + '\n')
    print(f"Saved {len(tiles_to_download)} tile URLs to {download_list_path}")
    
    # Save metadata
    metadata = {
        'project': 'Sierra Nevada Giant Forest LiDAR',
        'source': BASE_URL,
        'bbox_wgs84': BBOX_WGS84,
        'bbox_utm': {k: float(v) for k, v in BBOX_UTM.items()},
        'crs': 'EPSG:32611',
        'tile_count': len(tiles_to_download),
        'tiles': [{'filename': t['filename'], 'easting': t['easting'], 'northing': t['northing']} 
                  for t in tiles_to_download],
        'created': datetime.now().isoformat()
    }
    
    metadata_path = RAW_DIR / "giant_forest_metadata.json"
    with open(metadata_path, 'w') as f:
        json.dump(metadata, f, indent=2)
    print(f"Saved metadata to {metadata_path}")
else:
    print("No tiles to save. Please check coordinate matching above.")

## Download LAZ Tiles

Download the identified tiles. This may take a while depending on network speed and number of tiles.

In [None]:
# Check which tiles are already downloaded
def get_downloaded_tiles():
    """Get list of already downloaded LAZ files."""
    downloaded = []
    for f in LAZ_DIR.glob('*.laz'):
        downloaded.append(f.name)
    return set(downloaded)

downloaded = get_downloaded_tiles()
print(f"Already downloaded: {len(downloaded)} tiles")

# Filter tiles that need downloading
tiles_needed = [t for t in tiles_to_download if t['filename'] not in downloaded]
print(f"Tiles to download: {len(tiles_needed)}")

In [None]:
def download_tile(url, output_dir, max_retries=3):
    """
    Download a single LAZ tile using wget.
    Returns True if successful, False otherwise.
    """
    filename = os.path.basename(url)
    output_path = output_dir / filename
    
    if output_path.exists():
        print(f"  Already exists: {filename}")
        return True
    
    for attempt in range(max_retries):
        try:
            result = subprocess.run(
                ['wget', '-q', '--show-progress', '-O', str(output_path), url],
                capture_output=False,
                timeout=600  # 10 minute timeout per file
            )
            if result.returncode == 0 and output_path.exists():
                return True
        except subprocess.TimeoutExpired:
            print(f"  Timeout on attempt {attempt + 1}")
        except Exception as e:
            print(f"  Error on attempt {attempt + 1}: {e}")
    
    return False

# Alternative: Download all at once using wget -i
def download_all_tiles(tile_list_path, output_dir):
    """
    Download all tiles from a URL list file using wget.
    """
    cmd = [
        'wget',
        '-i', str(tile_list_path),
        '-P', str(output_dir),
        '--continue',
        '--progress=dot:giga',
        '--no-clobber'
    ]
    
    print(f"Running: {' '.join(cmd)}")
    result = subprocess.run(cmd)
    return result.returncode == 0

In [None]:
# Download tiles (uncomment to run)
# This may take significant time depending on the number and size of tiles

if tiles_needed:
    print(f"Starting download of {len(tiles_needed)} tiles...")
    print(f"Output directory: {LAZ_DIR}")
    print("\n" + "="*60)
    
    # Option 1: Download using list file (recommended for many files)
    download_list_path = RAW_DIR / "giant_forest_tiles.txt"
    if download_list_path.exists():
        success = download_all_tiles(download_list_path, LAZ_DIR)
        if success:
            print("\n✓ Download complete!")
        else:
            print("\n⚠ Download may have issues. Check logs above.")
    
    # Option 2: Download one by one (use if Option 1 fails)
    # for i, tile in enumerate(tiles_needed):
    #     print(f"[{i+1}/{len(tiles_needed)}] Downloading {tile['filename']}...")
    #     success = download_tile(tile['url'], LAZ_DIR)
    #     if not success:
    #         print(f"  ✗ Failed to download {tile['filename']}")
else:
    print("No tiles need downloading. All tiles already present or no tiles identified.")

## Verify Downloads

In [None]:
# Check final download status
downloaded_files = list(LAZ_DIR.glob('*.laz'))
total_size = sum(f.stat().st_size for f in downloaded_files)

print(f"Download Summary:")
print(f"  Total LAZ files: {len(downloaded_files)}")
print(f"  Total size: {total_size / (1024**3):.2f} GB")

if tiles_to_download:
    expected = len(tiles_to_download)
    actual = len(downloaded_files)
    if actual >= expected:
        print(f"\n✓ All expected tiles downloaded ({actual}/{expected})")
    else:
        print(f"\n⚠ Missing tiles: {expected - actual} of {expected}")
        
        # Find missing tiles
        downloaded_names = {f.name for f in downloaded_files}
        missing = [t for t in tiles_to_download if t['filename'] not in downloaded_names]
        print(f"Missing tiles:")
        for t in missing[:10]:
            print(f"  - {t['filename']}")
        if len(missing) > 10:
            print(f"  ... and {len(missing) - 10} more")

In [None]:
# List downloaded files with sizes
print("Downloaded LAZ files:")
print("-" * 60)

for f in sorted(downloaded_files)[:20]:
    size_mb = f.stat().st_size / (1024**2)
    print(f"  {f.name}: {size_mb:.1f} MB")

if len(downloaded_files) > 20:
    print(f"  ... and {len(downloaded_files) - 20} more files")

## Save Download Status

Update the metadata with download status for tracking across sessions.

In [None]:
# Update metadata with download status
metadata_path = RAW_DIR / "giant_forest_metadata.json"

if metadata_path.exists():
    with open(metadata_path, 'r') as f:
        metadata = json.load(f)
else:
    metadata = {'project': 'Sierra Nevada Giant Forest LiDAR'}

# Update download status
metadata['download_status'] = {
    'total_files': len(downloaded_files),
    'total_size_gb': total_size / (1024**3),
    'last_updated': datetime.now().isoformat(),
    'downloaded_files': [f.name for f in downloaded_files]
}

with open(metadata_path, 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"Updated metadata at {metadata_path}")

## Next Steps

After downloading the LAZ files:

1. **Run `2_laz_to_raster.ipynb`** to process LAZ → DEM/DSM/CHM
2. **Run `3_chm_exploration.ipynb`** to analyze the generated CHM
3. **Run `4_fractal_analysis.ipynb`** to compute fractal dimensions

See `PLAN.md` for the complete project workflow.