# Elevation Comparison for AGU 2025

## CO_WestCentral_2019

Use 3DEP 1m seamless product + STV products

Approach: reproject / regrid other datasets to match reference lidar (in this case 3DEP seamless)

In [None]:
from __future__ import annotations

%load_ext autoreload
%autoreload 2

In [None]:
# Temporary setup logging

import os

# Necessary to expose PROJ-level logs
os.environ["PROJ_DEBUG"] = "2"

In [None]:
import common_functions

# NOTE: move these into coincident?
import cql2
import numpy as np
import pyproj
import rasterio
import xarray as xr
from osgeo import gdal

import coincident
from coincident import pcd_fixtures

In [None]:
# Configure logging to only show rasterio debug messages
import logging

# Create a custom logger for rasterio
rasterio_logger = logging.getLogger("rasterio")
rasterio_logger.setLevel(logging.DEBUG)

# Create handlers
console_handler = logging.StreamHandler()
file_handler = logging.FileHandler("rasterio.log")

# Set level for handlers
console_handler.setLevel(logging.DEBUG)
file_handler.setLevel(logging.DEBUG)

# Create formatter
formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
console_handler.setFormatter(formatter)
file_handler.setFormatter(formatter)

# Add handlers to rasterio logger
rasterio_logger.addHandler(console_handler)
rasterio_logger.addHandler(file_handler)

# Prevent propagation to root logger to avoid duplicate messages
rasterio_logger.propagate = False

# Set root logger to WARNING to suppress other debug messages
logging.getLogger().setLevel(logging.WARNING)

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

### Overview plots 

In [None]:
site = "CO_WestCentral_2019"

site_meta = pcd_fixtures.read_pcd_site(site)

In [None]:
# common_functions.interactive_site_map(site_meta, title=site)

In [None]:
ax = common_functions.static_site_map(site_meta, title=f"{site} - Dataset Coverage")

In [None]:
common_functions.plot_timeline(site_meta, title=f"Data Availability for Site: {site}")

## Standard product comparison

As a baseline, we'll compare standard DEM products, IS2, and GEDI


In [None]:
gf_als, gf_maxar, gf_is2, gf_gedi, gf_overlap = common_functions.load_geodataframes(
    site_meta
)

## Get 1m 3DEP Seamless DEM (reference)

In [None]:
# Need to use cql2 to to search our custom USGS 3DEP
cql2_json = cql2.parse_text("collection='CO_WestCentral_2019_A19'").to_json()
gf_1m = coincident.search.search(
    dataset="3dep-1m", filter=cql2_json, intersects=gf_overlap
)
gf_1m.head()

In [None]:
# Because 3DEP 1m on UTM boundaries is duplicated, pick one
# Count unique UTM zones
zone_counts = gf_1m["proj:code"].value_counts()
print(f"UTM zone counts:\n{zone_counts}")

# Pick the zone with more tiles
primary_zone = zone_counts.idxmax()
print(f"\nPrimary zone: {primary_zone}")

# Filter to keep only the primary zone
gf_1m = gf_1m[gf_1m["proj:code"] == primary_zone].copy()
# print(f"\nFiltered to {len(gf_1m)} tiles in {primary_zone}")

In [None]:
# Get list of URLs to moasic
url_list = gf_1m["assets"].apply(lambda assets: assets["elevation"]["href"]).tolist()
url_list.sort()
url_list

In [None]:
# Overwrite metadata CRS to use correct 3D CRS
correct_crs = coincident.io.proj.construct_custom_utm_crs(
    gf_1m["proj:code"].iloc[0], a_srs="EPSG:6318", geoid="GEOID18"
)
correct_crs

In [None]:
# Our best best I think is to use coincident to download these tiles locally first (then they are cached unlike GDAL HTTP driver)
paths = coincident.io.download.download_files(url_list, output_dir="/tmp/")

In [None]:
# Load VRT from list of tiffs
# NOTE: this also crops to AOI and assigns corrected 3D CRS
vrt_path = coincident.io.gdal.create_vrt(
    paths,
    outputBounds=gf_overlap.to_crs(correct_crs).total_bounds,
    a_srs=correct_crs.to_wkt(),
    prepend_vsicurl=False,
    # Force cleanly tapped result
    custom_res=1.0,
    # NOTE: if coarsening with custom_res, prepending /vsicurl/ is actually much better
    # because single overview might get fetched...
    # prepend_vsicurl=False
    # custom_res=10.0,
    # resolution='user',
    # prepend_vsicurl=True
)

In [None]:
%%time

# NOTE: this is in NAD83(2011) / UTM zone 13N + NAVD88(GEOID18)
# Want to just check timing

# CPU times: user 15 s, sys: 4.11 s, total: 19.1 s
# Wall time: 1min 27s

# NOTE: this will be slow on wifi 200MB per tile @~10MB/s ~20s per tile
# Much better performance in aws us-west-2 where the data is hosted!

# Unfortunately not cached by http driver... will need to re-download all of them

# with rasterio.Env(cpl_debug="ON", GDAL_DISABLE_READDIR_ON_OPEN="EMPTY_DIR"):
#     da_lidar_orig = xr.open_dataarray(vrt_path, engine='rasterio').squeeze().compute()

In [None]:
# Save a tiled tiff for local use
# da_lidar_orig.rio.to_raster(f"{site}_3DEP_1m_UTM.tif", tiled=True, compress='lzw')

In [None]:
# Save a cog for later cloud use? ~2x slower
# OVERVIEWS=2,4,8,16
# Set GDAL creation options for COG
# creation_options = {
#     'driver': 'COG',
#     'compress': 'deflate',
#     'OVERVIEW_RESAMPLING': 'AVERAGE'
# }
# OVERVIEW_COMPRESS='LZW' # defaults to auto, so should use same as main image...
# da_lidar_orig.rio.to_raster(f"{site}_3DEP_1m_UTM_cog.tif", driver='COG', OVERVIEW_COUNT=4, compress='lzw')

In [None]:
# Construct correct transform to go from NAD83(2011) / UTM zone 13N + NAVD88(GEOID18) to
# ITRF2020 / UTM zone 13N ellipsoid heights
with gdal.Open(vrt_path) as ds:
    srs = ds.GetProjection()

utm_7912 = coincident.io.proj.construct_custom_utm_crs(primary_zone, a_srs="EPSG:7912")
transformer = coincident.io.proj.get_proj_transform(pyproj.CRS(srs), utm_7912)

In [None]:
vrt_path

In [None]:
# Using the above transform create a warped VRT to reproject upon loading
infile = vrt_path
# infile = '/Users/scotthenderson/GitHub/uw-cryo/test.tif'
warped_vrt_path = coincident.io.gdal.warp_with_pipeline(
    infile, srs, utm_7912, transformer.to_proj4()
)

In [None]:
# GDAL_OF_THREAD_SAFE is driver-dependent (GeoTIFF and Cloud Optimized GeoTIFF drivers support it; some others do not
warped_vrt_path

In [None]:
%%time

# Finally, load our reference lidar!
# NOTE: surprisingly slow... maybe cubic resampling? needing to pull shift grid locally?
# Try on command line
# NOTE: how to use warp-memory and do this in parallel?
# No PROJ messages? in any case 38s is ok, but 2x slower than GDAL CLI
# NOTE: GDAL_NUM_THREADS is key
# warped_vrt_path = '/var/folders/1v/k85p3x5d10zb9mhqx88lb3240000gn/T/tmp2zr7zush.vrt'

with rasterio.Env(CPL_DEBUG=True, GDAL_NUM_THREADS="ALL_CPUS"):
    da_lidar = xr.open_dataarray(warped_vrt_path, engine="rasterio").squeeze().compute()
da_lidar

In [None]:
# da_lidar.rio.to_raster('warped-rasterio.tif')
# da_gdal = xr.open_dataarray('/tmp/test7.tif', engine='rasterio').squeeze()
# da_gdal

In [None]:
# Floating point differences in WKT values? gdal includes  AREA_OR_POINT: Area as attribute
# xr.testing.assert_identical(da_lidar, da_gdal)
# xr.testing.assert_equal(da_lidar, da_gdal) # Cool.

In [None]:
%%time

# NOTE: at full resolution this can take a while and use a lot of memory
# Can be better to run compute on unprojected data first
# da_lidar = da_lidar.compute()

In [None]:
# Save a local copy for faster loading next time
# da_lidar.to_raster("CO_WestCentral_2019_1m_UTM13N_ITRF2014.tif", compress='lzw')

## Load additional datasets

In [None]:
# NOTE: also super slow going to 1m... better to load w/ same approach of
# isolating TIF_LIST first?
# Basically, does not work well for increasing resolution

# da_cop = coincident.io.xarray.load_dem_7912("cop30", aoi=gf_overlap)

# ds_cop_r = (
#     da_cop.rio.reproject_match(
#         da_lidar,
#         resampling=Resampling.bilinear,
#     )
#     .where(da_lidar.notnull())
#     .to_dataset(name="elevation")
# )

# ds_cop_r

In [None]:
# NOTE: for plotting, don't need such high res...
# Create hillshade variables for plot backgrounds
# This function expects Datasets

# 4m 33 seconds?! something is up.

# ds_lidar = da_lidar.to_dataset(name="elevation")

# ds_lidar["hillshade"] = coincident.io.gdal.gdaldem(
#     ds_lidar.elevation, "hillshade",
# )

In [None]:
## Load altimeters
# Get GEDI
data_gedi = coincident.io.sliderule.subset_gedi02a(
    gf_gedi, gf_overlap, include_worldcover=True, include_3dep=True
)

In [None]:
# data_gedi.to_parquet("data_gedi.parquet")
data_gedi.head()

In [None]:
# NOTE: effectively same results as 10m sampling !
import matplotlib.pyplot as plt

diff_sliderule = data_gedi["elevation_lm"] - data_gedi["3dep.value"]
coincident.plot.plot_diff_hist(diff_sliderule)
plt.title("GEDI elevation_lm - 3DEP 1m (Sliderule)")

In [None]:
# What if we do our own sampling
gfG = coincident.io.sliderule.to_3d(data_gedi, z_col="elevation_lm")
# NOTE: nothing dynamic here... would only apply to if helmerts are present (so changing ITRF or going to static CRS etc.)
t = pyproj.Transformer.from_crs(pyproj.CRS(gfG.crs), utm_7912, always_xy=True)
t.to_proj4()

In [None]:
# data_gedi_r = gfG.to_crs(utm_7912) # da_lidar.rio.crs?
data_gedi_r = gfG.to_crs(da_lidar.rio.crs)
print(data_gedi_r.crs)  # prints as projjson
data_gedi_r.head()

In [None]:
da_lidar.to_dataset(name="elevation")

In [None]:
# Sample our raster at these points
result = coincident.plot.utils.get_elev_diff(
    source=data_gedi_r,
    reference=da_lidar.to_dataset(name="elevation"),
    source_col="elevation_lm",
)

In [None]:
fig, axes = plt.subplots(2, 1, sharex=True, sharey=True, figsize=(8, 6))
ax = axes[0]
coincident.plot.plot_diff_hist(result.elev_diff, range=(-10, 10), ax=ax)
ax.set_title("GEDI elevation_lm - 3DEP 1m (Coincident)")
ax = axes[1]
coincident.plot.plot_diff_hist(diff_sliderule, range=(-10, 10), ax=ax)
ax.set_title("GEDI elevation_lm - 3DEP 1m (Sliderule)");

# NOTE: no idea why sliderule returns so many NaNs for GEDI, in any case, similar distribution!

In [None]:
# 4D transforms must go via proj...
# transformer.transform(xx=3496737.2679, yy=743254.4507, zz=5264462.9620, tt=2019.0)

In [None]:
# Get ICSAT-2
# AttributeError: 'GeoDataFrame' object has no attribute 'atl06_quality_summary'
# NOTE: much slower to sample 3DEP... switch to new endpoint / parquet sampling?
data_is2 = coincident.io.sliderule.subset_atl06(
    gf_is2, gf_overlap, include_worldcover=True, include_3dep=True
)

In [None]:
data_is2.head()

In [None]:
#  NoticeableImprovement over 10m sampling
diff_is2_sliderule = data_is2["h_li"] - data_is2["3dep.value"]
coincident.plot.plot_diff_hist(diff_is2_sliderule)
plt.title("IS2 ATL06 h_li - 3DEP 1m (Sliderule)")

In [None]:
# So here we need to correctly re-project the altimetry points to DEM
# NAIVE 2D reprojection
data_is2_r = data_is2.to_crs(da_lidar.rio.crs)
data_is2_r.head()

In [None]:
len(data_is2_r)

In [None]:
result = coincident.plot.utils.get_elev_diff(
    source=data_is2_r,
    reference=da_lidar.to_dataset(name="elevation"),
    source_col="h_li",
)


fig, axes = plt.subplots(2, 1, sharex=True, sharey=True, figsize=(8, 6))
ax = axes[0]
coincident.plot.plot_diff_hist(result.elev_diff, range=(-10, 10), ax=ax)
ax.set_title("ICESat-2 h_li - 3DEP 1m (Coincident)")
ax = axes[1]
coincident.plot.plot_diff_hist(diff_sliderule, range=(-10, 10), ax=ax)
ax.set_title("ICESat-2 h_li - 3DEP 1m (Sliderule)");

In [None]:
# ds_lidar = da_lidar.to_dataset(name="elevation")
test = da_lidar.coarsen(dict(x=10, y=10), boundary="trim").mean()
test.rio.crs

In [None]:
# reduce to 10m for plotting
ds_lidar = (
    da_lidar.coarsen(dict(x=10, y=10), boundary="trim")
    .mean()
    .to_dataset(name="elevation")
)
dems = {"3DEP": ds_lidar}  # , 'COP': ds_cop_r}
altimeters = {"ICESat-2": (data_is2_r, "h_li"), "GEDI": (data_gedi_r, "elevation_lm")}
# altimeters = {"GEDI": (data_gedi_r, "elevation_lm")}

In [None]:
gf_wc = coincident.search.search(
    dataset="worldcover",
    intersects=gf_overlap,
    datetime=["2021"],
    # NOTE: 2020 throwing an error...
)

In [None]:
# TODO: set requested resolution based on AOI / DEM resolution?
# Or always keep at native 10m resolution ?
ds_wc = coincident.io.xarray.to_dataset(
    gf_wc,
    bands=["map"],
    aoi=gf_overlap,
).compute()

In [None]:
np.unique(ds_wc["map"])

In [None]:
wc = coincident.datasets.planetary_computer.WorldCover()
classmap = wc.classmap
classmap

In [None]:
unique_values = np.unique(ds_wc["map"].to_numpy())
classmap = {k: v for k, v in classmap.items() if int(k) in unique_values}
classmap

In [None]:
coincident.plot.plot_esa_worldcover(ds_wc)

In [None]:
# UnboundLocalError: cannot access local variable 'diff_mappable' where it is not associated with a value
ax_dict = coincident.plot.compare_dems(
    dems,
    altimeters,
    # add_hillshade=True,
    altimetry_basemap="Esri.WorldImagery",
    # altimetry_basemap='hillshade',
    # elevation_clim=(1000, 4000),
    elevation_cmap="plasma",
    # NOTE: best size depends on aspect ratios and number of columns
    figsize=(8.5, 11),
    suptitle=f"Elevation comparisons for {site}",
)

In [None]:
# Zoom into a particular area and plot at full resolution
dem_subset = da_lidar.rio.clip_box(minx=3.43e5, miny=4.31e6, maxx=3.6e5, maxy=4.32e6)
da_hillshade = coincident.io.gdal.gdaldem(
    dem_subset,
    "hillshade",
)

In [None]:
coincident.plot.plot_dem(dem_subset, da_hillshade=da_hillshade);

In [None]:
# Need to resample 3DEP at 1m res!
ds_subset = dem_subset.to_dataset(name="elevation")
ds_subset["hillshade"] = da_hillshade

In [None]:
# NOTE: this work, but altimetry samples are still at 10m
ax_dict = coincident.plot.compare_dems(
    {"3DEP": ds_subset},
    altimeters,
    # add_hillshade=True,
    # altimetry_basemap='Esri.WorldImagery',
    # altimetry_basemap='hillshade',
    # elevation_clim=(1000, 4000),
    elevation_cmap="plasma",
    # NOTE: best size depends on aspect ratios and number of columns
    # figsize=(8.5, 11),
    figsize=(8.5, 6),
    altimetry_pointsize=3.0,
    suptitle=f"Elevation comparisons for {site}",
)

In [None]:
import geopandas as gpd
from shapely.geometry import box

bounds = dem_subset.rio.bounds()
sub_aoi = gpd.GeoDataFrame(
    geometry=[box(*bounds)],
    crs=dem_subset.rio.crs,
).to_crs("EPSG:4326")
# sub_aoi.explore()

In [None]:
# Get GEDI
data_gedi = coincident.io.sliderule.subset_gedi02a(
    gf_gedi,  # already have granule list
    sub_aoi,
    include_worldcover=True,
)
data_gedi

In [None]:
data_is2 = coincident.io.sliderule.subset_atl06(
    gf_is2,  # already have granule list
    sub_aoi,
    include_worldcover=True,
)

In [None]:
da_gedi_r = data_gedi.to_crs(dem_subset.rio.crs)
data_is2_r = data_is2.to_crs(dem_subset.rio.crs)

In [None]:
altimeters = {"ICESat-2": (data_is2_r, "h_li"), "GEDI": (da_gedi_r, "elevation_lm")}
ax_dict = coincident.plot.compare_dems(
    {"3DEP": ds_subset},
    altimeters,
    add_hillshade=True,
    # altimetry_basemap='Esri.WorldImagery',
    altimetry_basemap="hillshade",
    elevation_cmap="plasma",
    figsize=(8.5, 8),
    diff_clim=(-5, 5),
    diff_cmap="RdYlBu_r",  # avoid central white if on shaded relief 'managua', 'Spectral' orther options
    altimetry_pointsize=6.0,
    suptitle=f"Elevation comparisons for {site}",
)

In [None]:
# Recall date of observations
# Recall date of observations
# TODO: add dates as labels directly on plot?
# np.unique(data_is2_r.index.date)
# np.unique(data_gedi_r.index.date)
fmt = "%Y-%m-%d"
print(
    f"ALS acquisition: {gf_als['start_datetime'].iloc[0].strftime(fmt)} - {gf_als['end_datetime'].iloc[0].strftime(fmt)}"
)
print(
    f"ICESat-2 acquisition dates: {[d.strftime(fmt) for d in np.unique(data_is2_r.index.date)]}"
)
print(
    f"GEDI acquisition dates: {[d.strftime(fmt) for d in np.unique(data_gedi_r.index.date)]}"
)

## STV-generated products


1-meter posting generated with https://github.com/uw-cryo/lidar_tools

In [None]:
# Requires `stv-user` credentials
import os

print(os.environ.get("AWS_PROFILE"))
!aws s3 ls --human-readable s3://uw-cryo-stv/usgs_pcd_products/CO_WestCentral_2019_processing/

In [None]:
!AWS_PROFILE=stv-user gdalinfo /vsis3/uw-cryo-stv/usgs_pcd_products/CO_WestCentral_2019_processing/CO_WestCentral_2019-DSM_mos.tif

In [None]:
# Same comparison panel with custom lidar and stereo
# import coincident  # Seems to set global rasterio env
import rasterio
import xarray as xr

href = "s3://uw-cryo-stv/usgs_pcd_products/CO_WestCentral_2019_processing/CO_WestCentral_2019-DSM_mos.tif"


with rasterio.Env(profile_name="stv-user"):
    da_stv_lidar = xr.open_dataarray(
        href,
        engine="rasterio",
        mask_and_scale=False,  # otherwise uint8 -> float32!
        backend_kwargs={"open_kwargs": {"overview_level": 3}},
    )
da_stv_lidar