### full processing for:
(1) the quantification of spatial variation of lake surface height;   
(2) the correction of orthometric height.

In [1]:
import numpy as np
import xarray as xr
from glob import glob
import geopandas as gpd
from utils.functions import meter2deg
from utils.pixc2raster import pixc2raster
from utils.functions import pixc_geophy_cor
from utils.functions import sample_from_raster 
from utils.swot_data_mask import swot_pixc_mask
from utils.swot_data_filter import iter_IQR, pixc_height_local_filtering


In [2]:
lake_name = 'dianchi'
dir_pixc = f'data/{lake_name}-lake/swot-pixc'
path_lake_vec = f'data/{lake_name}-lake/hydrolake_{lake_name}_edit.gpkg'
## Check original .nc file.
paths_pixc = [p for p in glob(dir_pixc + '/SWOT*.nc') if '_masked' not in p and 'filter' not in p and 'height' not in p]
paths_pixc = sorted(paths_pixc)
print(len(paths_pixc))


9


In [3]:
## read vector file of the lake.
lake_gdf = gpd.read_file(path_lake_vec)
lake_gdf


Unnamed: 0,Hylak_id,Lake_name,Country,Continent,Poly_src,Lake_type,Grand_id,Lake_area,Shore_len,Shore_dev,...,Vol_src,Depth_avg,Dis_avg,Res_time,Elevation,Slope_100,Wshd_area,Pour_long,Pour_lat,geometry
0,1483,Dian Chi,China,Asia,SWBD,1,0,298.34,141.84,2.32,...,3,19.7,44.051,1540.3,1886,2.71,2875.6,102.603578,24.782792,"MULTIPOLYGON (((102.64217 24.9598, 102.64891 2..."


In [None]:
lake_decrease_gdf = lake_gdf.copy()
lon_center = lake_decrease_gdf.bounds.mean(axis=1).values
utm_zone = np.floor(lon_center/6)+31
epsg_code = f'326{int(utm_zone[0])}'
lake_decrease_gdf = lake_decrease_gdf.to_crs(epsg=epsg_code)
lake_decrease_gdf['geometry'] = lake_decrease_gdf.geometry.buffer(-200)  # interior buffer
lake_decrease_gdf = lake_decrease_gdf.to_crs(epsg=4326)


### data mask

In [5]:
pixcs_masked = []
for i, path in enumerate(paths_pixc):
  print(f'input file: {i} {path}')
  # Define the output path
  path_masked = path.split('.')[0]+'_masked.nc'
  pixc_nc = xr.open_dataset(path, group='pixel_cloud')
  pixc_masked = swot_pixc_mask(pixc_nc=pixc_nc, 
                                vars_sel=['latitude', 'longitude', 'height', 
                                          'solid_earth_tide', 'pole_tide', 
                                          'load_tide_fes', 'iono_cor_gim_ka', 'geoid',
                                          ],
                                region_gdf=lake_decrease_gdf, 
                                path_masked=path_masked)
  pixcs_masked.append(pixc_masked)


input file: 0 data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_009_049_198L_20240105T211648_20240105T211659_PIC0_01.nc
file written to data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_009_049_198L_20240105T211648_20240105T211659_PIC0_01_masked.nc
input file: 1 data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_010_049_198L_20240126T180154_20240126T180205_PIC0_01.nc
file written to data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_010_049_198L_20240126T180154_20240126T180205_PIC0_01_masked.nc
input file: 2 data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_012_049_198L_20240308T113203_20240308T113214_PIC0_01.nc
file written to data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_012_049_198L_20240308T113203_20240308T113214_PIC0_01_masked.nc
input file: 3 data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_014_049_198L_20240419T050213_20240419T050224_PIC0_01.nc
file written to data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_014_049_198L_20240419T050213_20240419T050224_PIC0_01_masked.nc
input file: 4 data/dianchi-lake/swot-pixc/SWOT_L2_HR

### data filter 

In [7]:
pixcs_filtered_ds = []
for i, pixc_masked in enumerate(pixcs_masked):
    pixc_ht_filter_ds = pixc_masked[['geoid', 'height']]
    ## 1. geophysical correction for height, and convert to orthometric height
    pixc_ht_cor = pixc_geophy_cor(pixc_nc=pixc_masked)
    pixc_ht_ortho = pixc_ht_cor - pixc_masked.geoid.values
    pixc_ht_filter_ds = pixc_ht_filter_ds.assign({'ht_ortho': (("points",), pixc_ht_ortho)})
    pixc_ht_filter_ds.ht_ortho.attrs['description'] = 'PIXC height data with geophysical correction and orthometric correction (use geoid variable)'

    ## 2. height filtering 
    ## 2.1 height filtering in global region using IQR method
    pixc_ht_ortho_filter1, IQR = iter_IQR(pixc_ht_ortho, IQR_thre=0.3, iter_max=5)
    pixc_ht_ortho_filter1 = pixc_ht_ortho_filter1.filled(np.nan)
    pixc_ht_filter_ds = pixc_ht_filter_ds.assign({'ht_ortho_filter1': (("points",), pixc_ht_ortho_filter1)})
    pixc_ht_filter_ds.ht_ortho_filter1.attrs['description'] = 'PIXC height data with global filtering using IQR method'
    ## 2.2 height filtering in local region
    pixc_ht_ortho_filter2 = pixc_height_local_filtering(pixc_height=pixc_ht_ortho_filter1, 
                                              pixc_lonlat=(pixc_ht_filter_ds.longitude.values, pixc_ht_filter_ds.latitude.values), 
                                              thre=0.2, 
                                              radius_m=500)
    pixc_ht_filter_ds = pixc_ht_filter_ds.assign({'ht_ortho_filter2': (("points",), pixc_ht_ortho_filter2)})
    pixc_ht_filter_ds.ht_ortho_filter2.attrs['description'] = 'PIXC height with both global and local filtering'
    pixc_ht_filter_ds = pixc_ht_filter_ds.assign({'ht_valid_ids': (("points",), ~np.isnan(pixc_ht_ortho_filter2))})
    pixc_ht_filter_ds.ht_valid_ids.attrs['description'] = 'PIXC height indices of valid data'
    pixcs_filtered_ds.append(pixc_ht_filter_ds)

    # # 3. save the filtered data
    path_pixc_filtered = paths_pixc[i].split('.')[0]+'_masked_filtered.nc'
    pixc_ht_filter_ds.to_netcdf(path_pixc_filtered, mode='w', format='NETCDF4')  ## save the filtered data
    print(f'Filtered data saved to: {path_pixc_filtered}')


Filtered data saved to: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_009_049_198L_20240105T211648_20240105T211659_PIC0_01_masked_filtered.nc
Filtered data saved to: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_010_049_198L_20240126T180154_20240126T180205_PIC0_01_masked_filtered.nc
Filtered data saved to: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_012_049_198L_20240308T113203_20240308T113214_PIC0_01_masked_filtered.nc
Filtered data saved to: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_014_049_198L_20240419T050213_20240419T050224_PIC0_01_masked_filtered.nc
Filtered data saved to: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_016_049_198L_20240530T223221_20240530T223232_PIC0_01_masked_filtered.nc
Filtered data saved to: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_018_049_198L_20240711T160229_20240711T160240_PIC0_01_masked_filtered.nc
Filtered data saved to: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_019_049_198L_20240801T124734_20240801T124745_PIC0_02_masked_filtered.nc
Filtered data saved 

### calculate the height variation of lake surface

In [8]:
## calculate the height heterogeneity
xmin, ymin, xmax, ymax = lake_gdf.geometry[0].buffer(0.01).bounds
raster_extent = (xmin, xmax, ymin, ymax)
lat_center = pixcs_filtered_ds[0]['geoid'].latitude.mean().values
res_lon, res_lat = meter2deg(meter=500, lat=lat_center)

## 1. calculate the spatial variation of ellipsoidal height
pixcs_spavar_ls = []
for pixc_filtered_ds in pixcs_filtered_ds:
    pixc_spavar_xr = pixc_filtered_ds[['geoid', 'ht_valid_ids']]
    ## calculate corrected geoid
    pixc_ht_ellip = pixc_filtered_ds['ht_ortho_filter2'].values + pixc_filtered_ds['geoid'].values
    pixc_ht_ellip_spavar = pixc_ht_ellip - np.nanmean(pixc_ht_ellip) ## i.e., spatial variance of height anomalies
    geoid_mean = np.nanmean(pixc_filtered_ds['geoid'])
    pixc_geoid_cor = geoid_mean + pixc_ht_ellip_spavar    ## Pixc Geoid correction

    ## save as DataArray
    pixc_spavar_xr = pixc_spavar_xr.assign({'geoid_cor': (("points",), pixc_geoid_cor)})
    pixc_spavar_xr = pixc_spavar_xr.assign({'ht_ellip_spavar': (("points",), pixc_ht_ellip_spavar)})
    pixc_spavar_xr['geoid_cor'].attrs['description'] = 'corrected geoid of PIXC points'
    pixc_spavar_xr['ht_ellip_spavar'].attrs['description'] = 'spatial variation of lake surface height'
    pixcs_spavar_ls.append(pixc_spavar_xr)

pixcs_spavar_xr = xr.concat(pixcs_spavar_ls, dim='points')   ## merge multitemporal xarray dataset.
pixcs_spavar_xr = pixcs_spavar_xr[['ht_ellip_spavar']]
raster_ht_ellip_spavar = pixc2raster(pixc_var = pixcs_spavar_xr['ht_ellip_spavar'], 
                            raster_extent=raster_extent,
                            pixc_lonlat=(pixcs_spavar_xr.longitude.values, 
                                         pixcs_spavar_xr.latitude.values), 
                            resolution=(res_lon, res_lat), agg_method='median')
raster_ht_ellip_spavar.name = "height_spavar_smoothed"
## normalization
raster_ht_ellip_spavar.values =  raster_ht_ellip_spavar.values - np.nanmean(raster_ht_ellip_spavar.values)
## save to path
raster_ht_ellip_spavar.attrs['description'] = 'the swot data-derived spatial variation of ellipsoidal height of the lake'
raster_height_spavar_ds = xr.Dataset({
        'ht_spavar_smoothed': raster_ht_ellip_spavar.rename('ht_spavar_smoothed')
        })
## Save as NetCDF file
path_raster_height_spavar = dir_pixc + '/raster_height_spavar.nc'
raster_height_spavar_ds.to_netcdf(path_raster_height_spavar)
print(f'data saved to: {path_raster_height_spavar}')


data saved to: data/dianchi-lake/swot-pixc/raster_height_spavar.nc


### calculate pixc-based height_ortho and corrected height_ortho 

In [10]:
for i, pixc_filtered in enumerate(pixcs_filtered_ds):
    pixc_ht_ds = pixc_filtered[['geoid', 'ht_ortho_filter2']]
    ## 1. extract the pixc corrected geoid from the raster data 
    pixc_ht_spavar = sample_from_raster(
        raster_value = raster_height_spavar_ds.ht_spavar_smoothed.values,
        raster_x = raster_height_spavar_ds.x.values,
        raster_y = raster_height_spavar_ds.y.values,
        points_x = pixc_filtered.longitude.values,
        points_y = pixc_filtered.latitude.values
    )
    pixc_ht_ds = pixc_ht_ds.assign({'ht_spavar': (("points",), pixc_ht_spavar)})
    pixc_ht_ds['ht_spavar'].attrs['description'] = 'spatial variation of lake height'
    ## 2. calculate pixc height with corrected geoid
    geoid_cor = pixc_filtered.geoid.mean(dim='points').values + pixc_ht_ds['ht_spavar'].values
    pixc_ht_ortho_cor = pixc_filtered.height.values - geoid_cor
    pixc_ht_ortho_cor[~pixc_filtered['ht_valid_ids'].values] = np.nan   ## mask invalid values
    pixc_ht_ds = pixc_ht_ds.assign({'ht_ortho_cor': (("points",), pixc_ht_ortho_cor)})
    pixc_ht_ds['ht_ortho_cor'].attrs['description'] = 'orthometric height using corrected geoid height'
    pixc_ht_ds = pixc_ht_ds.rename({'ht_ortho_filter2': 'ht_ortho'})
    pixc_ht_ds['ht_ortho'].attrs['description'] = 'orthometric height (after filtering) using geoid height'
    ### 3. save the pixc heights to a new NetCDF file
    path_pixc_height = paths_pixc[i].replace('.nc', '_height.nc')
    pixc_ht_ds.to_netcdf(path_pixc_height)
    print(f"Path to save: {path_pixc_height}")



Path to save: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_009_049_198L_20240105T211648_20240105T211659_PIC0_01_height.nc
Path to save: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_010_049_198L_20240126T180154_20240126T180205_PIC0_01_height.nc
Path to save: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_012_049_198L_20240308T113203_20240308T113214_PIC0_01_height.nc
Path to save: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_014_049_198L_20240419T050213_20240419T050224_PIC0_01_height.nc
Path to save: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_016_049_198L_20240530T223221_20240530T223232_PIC0_01_height.nc
Path to save: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_018_049_198L_20240711T160229_20240711T160240_PIC0_01_height.nc
Path to save: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_019_049_198L_20240801T124734_20240801T124745_PIC0_02_height.nc
Path to save: data/dianchi-lake/swot-pixc/SWOT_L2_HR_PIXC_021_049_198L_20240912T061744_20240912T061755_PIC0_01_height.nc
Path to save: data/dianchi-lake/