# NAIP Imagery Download and Processing

This notebook downloads NAIP (National Agriculture Imagery Program) imagery from Microsoft's Planetary Computer STAC API, creates a mosaic, and reprojects it to EPSG:6339.

## Process Overview:
1. **Setup**: Configure parameters and load AOI
2. **Search**: Find NAIP imagery intersecting the area of interest
3. **Download**: Retrieve individual tiles
4. **Mosaic**: Merge tiles into a single raster
5. **Reproject**: Convert to target coordinate system

## Requirements:
- Internet connection for data download
- Sufficient disk space for imagery storage
- Area of interest defined as GeoJSON file

## 1. Configuration and Setup

In [1]:
# Standard library imports
import os
import sys
from pathlib import Path

# Third-party imports
import folium
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import rasterio
from rasterio.enums import Resampling
from rasterio.merge import merge
from rasterio.plot import show
from rasterio.warp import calculate_default_transform, reproject
import requests
from shapely.geometry import mapping
from pystac_client import Client
from tqdm import tqdm

# Print version information
print(f"geopandas version: {gpd.__version__}")
print(f"pandas version: {pd.__version__}")
print(f"folium version: {folium.__version__}")
print(f"python version: {sys.version[:6]}")
print(f"rasterio version: {rasterio.__version__}")

geopandas version: 1.1.1
pandas version: 2.3.3
folium version: 0.20.0
python version: 3.11.1
rasterio version: 1.4.3


In [2]:
os.chdir("../data")

In [3]:
# ===== CONFIGURATION =====
# Meadow AOI name (change as needed)
MEADOW_NAME = "Lacey"  # Options: "Lacey", "Humbug", "subb"

# File paths and directories
SHAPEFILE_PATH = "meadow_extent.geojson"
#BASE_DATA_DIR = Path("../data")
OUTPUT_DIR = Path(MEADOW_NAME)
#NAIP_DOWNLOADS_DIR = OUTPUT_DIR / "naip_downloads"

# Output filenames
MOSAIC_OUTPUT = "naip_mosaic_2018.tif"
REPROJECTED_OUTPUT = "naip_mosaic_2018_epsg6339.tif"

# STAC API configuration
STAC_URL = "https://planetarycomputer.microsoft.com/api/stac/v1"
COLLECTION = "naip"
YEAR = "2018"
MAX_ITEMS = 100

# Processing parameters
TARGET_CRS = "EPSG:6339"
DOWNLOAD_CHUNK_SIZE = 8192  # bytes (must be integer)
DTYPE = rasterio.float32

print(f"Configuration loaded for meadow: {MEADOW_NAME}")
print(f"Output directory: {OUTPUT_DIR}")
print(f"Target year: {YEAR}, Target CRS: {TARGET_CRS}")

Configuration loaded for meadow: Lacey
Output directory: Lacey
Target year: 2018, Target CRS: EPSG:6339


In [4]:
# Create output directories and set working directory
#OUTPUT_DIR.mkdir(parents=True, exist_ok=False)
#NAIP_DOWNLOADS_DIR.mkdir(parents=True, exist_ok=False)

# Change to the meadow-specific data directory
os.chdir(OUTPUT_DIR)
print(f"Working directory set to: {os.getcwd()}")
#print(f"Created directories: {OUTPUT_DIR}, {NAIP_DOWNLOADS_DIR}")

Working directory set to: /media/grendel/7db216a7-836f-4e8d-b439-e4f999cedb23/USGS/meadow_assessment/data/Lacey


## 2. Load Area of Interest (AOI)

In [5]:
try:
    # Load AOI from shapefile
    aoi = gpd.read_file(SHAPEFILE_PATH)
    print(f"Loaded AOI with {len(aoi)} feature(s)")
    print(f"Original CRS: {aoi.crs}")
    
    # Reproject to WGS84 (required by Planetary Computer)
    aoi = aoi.to_crs("EPSG:4326")
    print(f"Reprojected to: {aoi.crs}")
    
    # Convert to geometry for STAC search
    aoi_geom = mapping(aoi.union_all())
    print(f"AOI bounds: {aoi.total_bounds}")
    
except FileNotFoundError:
    print(f"Error: Could not find shapefile at {SHAPEFILE_PATH}")
    raise
except Exception as e:
    print(f"Error loading AOI: {e}")
    raise

Loaded AOI with 1 feature(s)
Original CRS: EPSG:32610
Reprojected to: EPSG:4326
AOI bounds: [-120.43489574   39.4642625  -120.40901002   39.48237965]


## 3. Connect to STAC API and Search for Imagery

In [6]:
try:
    # Connect to Planetary Computer STAC API
    client = Client.open(STAC_URL)
    print(f"Connected to STAC API: {STAC_URL}")
    
    # List available collections to verify connection
    collections = list(client.get_collections())
    print(f"Available collections: {len(collections)}")
    
except Exception as e:
    print(f"Error connecting to STAC API: {e}")
    raise

Connected to STAC API: https://planetarycomputer.microsoft.com/api/stac/v1
Available collections: 126


In [7]:
try:
    # Search for NAIP imagery
    search = client.search(
        collections=[COLLECTION],
        intersects=aoi_geom,
        datetime=YEAR,
        max_items=MAX_ITEMS
    )
    
    print(f"Search parameters:")
    print(f"  Collection: {COLLECTION}")
    print(f"  Year: {YEAR}")
    print(f"  Max items: {MAX_ITEMS}")
    
except Exception as e:
    print(f"Error creating search: {e}")
    raise

Search parameters:
  Collection: naip
  Year: 2018
  Max items: 100


In [8]:
try:
    # Execute search and get items
    items = list(search.items())
    print(f"Found {len(items)} NAIP items intersecting AOI")
    
    if len(items) == 0:
        print("Warning: No NAIP items found for the specified criteria")
    else:
        print(f"Date range of found items:")
        dates = [item.datetime.strftime("%Y-%m-%d") for item in items if item.datetime]
        if dates:
            print(f"  From: {min(dates)} to {max(dates)}")
            
except Exception as e:
    print(f"Error retrieving search items: {e}")
    raise

Found 2 NAIP items intersecting AOI
Date range of found items:
  From: 2018-09-16 to 2018-09-16


## 4. Download NAIP Imagery

In [9]:
os.mkdir("naip_downloads")
NAIP_DOWNLOADS_DIR = Path("naip_downloads")

In [10]:
downloaded_files = []
failed_downloads = []

# Check directory exists before starting
print(f"Download directory: {NAIP_DOWNLOADS_DIR}")
print(f"Directory exists: {NAIP_DOWNLOADS_DIR.exists()}")
print(f"Directory is writable: {os.access(NAIP_DOWNLOADS_DIR, os.W_OK)}")

for item in tqdm(items, desc="Downloading NAIP tiles"):
    try:
        asset = item.assets["image"]
        signed_href = asset.href
        filename = NAIP_DOWNLOADS_DIR / f"{item.id}.tif"
        
        print(f"\nProcessing item: {item.id}")
        print(f"Target file: {filename}")
        print(f"URL: {signed_href}")
        
        if filename.exists():
            # Verify file size before skipping
            file_size = filename.stat().st_size
            if file_size > 1000:  # Basic size check (1KB minimum)
                print(f"File {filename.name} already exists ({file_size:,} bytes). Skipping.")
                downloaded_files.append(filename)
                continue
            else:
                print(f"File {filename.name} exists but is too small. Re-downloading.")
                filename.unlink()  # Remove corrupted file
        
        # Ensure parent directory exists
        #filename.parent.mkdir(parents=True, exist_ok=True)
        
        # Download with better error handling
        print(f"Starting download...")
        response = requests.get(signed_href, stream=True, timeout=30)
        response.raise_for_status()
        
        total_size = int(response.headers.get('content-length', 0))
        print(f"Expected size: {total_size:,} bytes")
        
        with open(filename, "wb") as f:
            downloaded_size = 0
            for chunk in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
                if chunk:
                    f.write(chunk)
                    downloaded_size += len(chunk)
        
        print(f"Downloaded: {downloaded_size:,} bytes")
        
        # Verify download completed
        if total_size > 0 and downloaded_size != total_size:
            print(f"Warning: Download size mismatch for {filename.name}")
            print(f"  Expected: {total_size:,} bytes, Got: {downloaded_size:,} bytes")
        
        # Verify file was created and has content
        if filename.exists() and filename.stat().st_size > 0:
            downloaded_files.append(filename)
            print(f"Successfully downloaded: {filename.name}")
        else:
            print(f"Error: File was not created or is empty: {filename}")
            failed_downloads.append(item.id)
        
    except requests.exceptions.RequestException as e:
        print(f"Network error downloading {item.id}: {e}")
        failed_downloads.append(item.id)
    except PermissionError as e:
        print(f"Permission error downloading {item.id}: {e}")
        print(f"Check write permissions for: {filename.parent}")
        failed_downloads.append(item.id)
    except Exception as e:
        print(f"Error downloading {item.id}: {type(e).__name__}: {e}")
        failed_downloads.append(item.id)

print(f"\n{'='*50}")
print(f"DOWNLOAD SUMMARY")
print(f"{'='*50}")
print(f"Successfully downloaded: {len(downloaded_files)} files")
print(f"Failed downloads: {len(failed_downloads)} files")
if downloaded_files:
    print(f"Downloaded files:")
    for f in downloaded_files:
        print(f"  - {f.name} ({f.stat().st_size:,} bytes)")
if failed_downloads:
    print(f"Failed items: {failed_downloads}")

Download directory: naip_downloads
Directory exists: True
Directory is writable: True


Downloading NAIP tiles:   0%|          | 0/2 [00:00<?, ?it/s]


Processing item: ca_m_3912037_nw_10_060_20180916_20190210
Target file: naip_downloads/ca_m_3912037_nw_10_060_20180916_20190210.tif
URL: https://naipeuwest.blob.core.windows.net/naip/v002/ca/2018/ca_060cm_2018/39120/m_3912037_nw_10_060_20180916_20190210.tif
Starting download...
Expected size: 499,813,616 bytes


Downloading NAIP tiles:  50%|█████     | 1/2 [10:15<10:15, 615.48s/it]

Downloaded: 499,813,616 bytes
Successfully downloaded: ca_m_3912037_nw_10_060_20180916_20190210.tif

Processing item: ca_m_3912037_ne_10_060_20180916_20190210
Target file: naip_downloads/ca_m_3912037_ne_10_060_20180916_20190210.tif
URL: https://naipeuwest.blob.core.windows.net/naip/v002/ca/2018/ca_060cm_2018/39120/m_3912037_ne_10_060_20180916_20190210.tif
Starting download...
Expected size: 496,185,831 bytes


Downloading NAIP tiles: 100%|██████████| 2/2 [20:24<00:00, 612.35s/it]

Downloaded: 496,185,831 bytes
Successfully downloaded: ca_m_3912037_ne_10_060_20180916_20190210.tif

DOWNLOAD SUMMARY
Successfully downloaded: 2 files
Failed downloads: 0 files
Downloaded files:
  - ca_m_3912037_nw_10_060_20180916_20190210.tif (499,813,616 bytes)
  - ca_m_3912037_ne_10_060_20180916_20190210.tif (496,185,831 bytes)





In [11]:
# Validate downloaded files before mosaicking
tif_files = list(NAIP_DOWNLOADS_DIR.glob("*.tif"))
print(f"Found {len(tif_files)} TIF files for mosaicking")

if len(tif_files) == 0:
    raise ValueError("No TIF files found for mosaicking. Check download step.")

# Verify files can be opened
valid_files = []
for tif_file in tif_files:
    try:
        with rasterio.open(tif_file) as src:
            # Basic validation
            if src.width > 0 and src.height > 0 and src.count > 0:
                valid_files.append(tif_file)
            else:
                print(f"Warning: Invalid raster dimensions in {tif_file.name}")
    except Exception as e:
        print(f"Warning: Cannot open {tif_file.name}: {e}")

print(f"Valid files for mosaicking: {len(valid_files)}")
if len(valid_files) == 0:
    raise ValueError("No valid TIF files found for mosaicking.")

Found 2 TIF files for mosaicking
Valid files for mosaicking: 2


## 5. Create Mosaic from Downloaded Tiles

In [12]:
try:
    # Open all rasters using context managers for proper resource handling
    src_files_to_mosaic = []
    
    print("Opening raster files...")
    for tif_file in tqdm(valid_files, desc="Opening files"):
        try:
            src = rasterio.open(tif_file)
            src_files_to_mosaic.append(src)
        except Exception as e:
            print(f"Error opening {tif_file.name}: {e}")
    
    if not src_files_to_mosaic:
        raise ValueError("No files could be opened for mosaicking")
    
    print(f"Successfully opened {len(src_files_to_mosaic)} files")
    print("Creating mosaic...")
    
    # Create mosaic
    mosaic, out_transform = merge(src_files_to_mosaic, dtype=DTYPE)
    
    print(f"Mosaic created:")
    print(f"  Shape: {mosaic.shape}")
    print(f"  Data type: {mosaic.dtype}")
    print(f"  Bands: {mosaic.shape[0]}")
    
    # Copy metadata from first file
    out_meta = src_files_to_mosaic[0].meta.copy()
    out_meta.update({
        "driver": "GTiff",
        "height": mosaic.shape[1],
        "width": mosaic.shape[2],
        "dtype": DTYPE,
        "transform": out_transform,
        "compress": "lzw"  # Add compression to reduce file size
    })
    
    print(f"Output metadata updated")
    
except Exception as e:
    print(f"Error during mosaicking: {e}")
    raise
finally:
    # Ensure all files are properly closed
    for src in src_files_to_mosaic:
        try:
            src.close()
        except:
            pass

Opening raster files...


Opening files: 100%|██████████| 2/2 [00:00<00:00, 297.50it/s]

Successfully opened 2 files
Creating mosaic...





Mosaic created:
  Shape: (4, 12590, 18760)
  Data type: float32
  Bands: 4
Output metadata updated


In [13]:
try:
    print(f"Writing mosaic to: {MOSAIC_OUTPUT}")
    with rasterio.open(MOSAIC_OUTPUT, "w", **out_meta) as dest:
        dest.write(mosaic)
    
    # Verify the output file
    with rasterio.open(MOSAIC_OUTPUT) as src:
        print(f"Mosaic written successfully:")
        print(f"  File size: {Path(MOSAIC_OUTPUT).stat().st_size / (1024*1024):.1f} MB")
        print(f"  Dimensions: {src.width} x {src.height}")
        print(f"  Bands: {src.count}")
        print(f"  CRS: {src.crs}")
        print(f"  Bounds: {src.bounds}")
        
except Exception as e:
    print(f"Error writing mosaic: {e}")
    raise

Writing mosaic to: naip_mosaic_2018.tif
Mosaic written successfully:
  File size: 1305.0 MB
  Dimensions: 18760 x 12590
  Bands: 4
  CRS: EPSG:26910
  Bounds: BoundingBox(left=714810.0, bottom=4368156.0, right=726066.0, top=4375710.0)


## 6. Reproject to Target Coordinate System

In [14]:
try:
    print(f"Reprojecting mosaic from source CRS to {TARGET_CRS}")
    
    with rasterio.open(MOSAIC_OUTPUT) as src:
        print(f"Source CRS: {src.crs}")
        print(f"Source dimensions: {src.width} x {src.height}")
        
        # Calculate optimal transform and dimensions for target CRS
        transform, width, height = calculate_default_transform(
            src.crs, TARGET_CRS, src.width, src.height, *src.bounds
        )
        
        print(f"Target dimensions: {width} x {height}")
        
        # Update metadata for reprojected output
        kwargs = src.meta.copy()
        kwargs.update({
            "crs": TARGET_CRS,
            "transform": transform,
            "width": width,
            "height": height,
            "compress": "lzw"
        })
        
        print(f"Writing reprojected mosaic to: {REPROJECTED_OUTPUT}")
        
        with rasterio.open(REPROJECTED_OUTPUT, "w", **kwargs) as dst:
            # Reproject each band
            for i in tqdm(range(1, src.count + 1), desc="Reprojecting bands"):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=TARGET_CRS,
                    resampling=Resampling.nearest
                )
    
    # Verify reprojected output
    with rasterio.open(REPROJECTED_OUTPUT) as dst:
        print(f"\nReprojection completed successfully:")
        print(f"  File size: {Path(REPROJECTED_OUTPUT).stat().st_size / (1024*1024):.1f} MB")
        print(f"  Final CRS: {dst.crs}")
        print(f"  Final dimensions: {dst.width} x {dst.height}")
        print(f"  Final bounds: {dst.bounds}")
        
except Exception as e:
    print(f"Error during reprojection: {e}")
    raise

Reprojecting mosaic from source CRS to EPSG:6339
Source CRS: EPSG:26910
Source dimensions: 18760 x 12590
Target dimensions: 18760 x 12590
Writing reprojected mosaic to: naip_mosaic_2018_epsg6339.tif


Reprojecting bands: 100%|██████████| 4/4 [00:29<00:00,  7.48s/it]



Reprojection completed successfully:
  File size: 1305.0 MB
  Final CRS: EPSG:6339
  Final dimensions: 18760 x 12590
  Final bounds: BoundingBox(left=714809.2856363466, bottom=4368156.486669507, right=726065.2232289115, top=4375710.444787332)


In [22]:
print("="*60)
print("NAIP PROCESSING COMPLETED SUCCESSFULLY")
print("="*60)
print(f"Meadow: {MEADOW_NAME}")
print(f"Year: {YEAR}")
print(f"Items found: {len(items) if 'items' in locals() else 'N/A'}")
print(f"Files downloaded: {len(downloaded_files) if 'downloaded_files' in locals() else 'N/A'}")
print()
print("Output files created:")
print(f"  Original mosaic: {MOSAIC_OUTPUT}")
if Path(MOSAIC_OUTPUT).exists():
    print(f"    Size: {Path(MOSAIC_OUTPUT).stat().st_size / (1024*1024):.1f} MB")
print(f"  Reprojected mosaic: {REPROJECTED_OUTPUT}")
if Path(REPROJECTED_OUTPUT).exists():
    print(f"    Size: {Path(REPROJECTED_OUTPUT).stat().st_size / (1024*1024):.1f} MB")
print(f"  Working directory: {os.getcwd()}")
print("="*60)

NAIP PROCESSING COMPLETED SUCCESSFULLY
Meadow: Lacey
Year: 2018
Items found: 2
Files downloaded: 2

Output files created:
  Original mosaic: naip_mosaic_2018.tif
    Size: 1305.0 MB
  Reprojected mosaic: naip_mosaic_2018_epsg6339.tif
    Size: 1305.0 MB
  Working directory: /media/grendel/7db216a7-836f-4e8d-b439-e4f999cedb232/USGS/meadow_temp/data/Lacey


In [18]:
import numpy as np
from rasterio.mask import mask

# Read in the raster
with rasterio.open(REPROJECTED_OUTPUT) as src:
    # Clip to AOI geometry
    crs = src.crs
    aoi.to_crs(crs, inplace=True)

    clipped, clipped_transform = mask(src, aoi.geometry, crop=True, nodata=np.nan)
    clipped_meta = src.meta.copy()
    clipped_meta.update({
        "height": clipped.shape[1],
        "width": clipped.shape[2],
        "transform": clipped_transform
    })

# Write out the clipped raster
clipped_output = f"naip_{MEADOW_NAME}.tif"
with rasterio.open(clipped_output, "w", **clipped_meta) as dest:
    dest.write(clipped)

print(f"Clipped raster written to: {clipped_output}")

Clipped raster written to: naip_Lacey.tif


In [19]:
import shutil

# Remove the naip_downloads directory and all its contents
if NAIP_DOWNLOADS_DIR.exists():
    shutil.rmtree(NAIP_DOWNLOADS_DIR)
    print(f"Deleted folder and all files: {NAIP_DOWNLOADS_DIR}")
else:
    print(f"Directory does not exist: {NAIP_DOWNLOADS_DIR}")

# Delete the original mosaic file
if os.path.exists(MOSAIC_OUTPUT):
    os.remove(MOSAIC_OUTPUT)
    print(f"Deleted file: {MOSAIC_OUTPUT}")
else:
    print(f"File not found: {MOSAIC_OUTPUT}")

# Delete the original mosaic file
if os.path.exists(REPROJECTED_OUTPUT):
    os.remove(REPROJECTED_OUTPUT)
    print(f"Deleted file: {REPROJECTED_OUTPUT}")
else:
    print(f"File not found: {REPROJECTED_OUTPUT}")

Directory does not exist: naip_downloads
Deleted file: naip_mosaic_2018.tif
Deleted file: naip_mosaic_2018_epsg6339.tif
