In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import rioxarray
import platform
from tqdm import tqdm 
from pathlib import Path
from rasterio.warp import transform
from pyproj import Transformer
import cartopy.crs as ccrs
import cartopy.feature as cfeature

from dask.diagnostics import ProgressBar
from ascat.read_native.ragged_array_ts import CellFileCollection
from ascat.cell import CellGridFiles

import geopandas as gpd
from shapely.geometry import Polygon, MultiPoint

## Map Sentinel-1 and ASCAT

In [None]:
def process_sentinel_and_ascat(sentinel_path, output_path, sentinel_crs):
    """
    Load Sentinel-1 Zarr file, assign EPSG, collocate with ASCAT data, and save to NetCDF.
    
    Parameters:
        zarr_path (str or Path): Path to Sentinel-1 Zarr file
        sentinel_epsg (str): EPSG code for Sentinel-1 data (e.g., "EPSG:27704")
        output_path (str): Path to save the resulting NetCDF file
    """
    print("Loading Sentinel-1 data...")
    sentinel_ds = xr.open_zarr(sentinel_path)
    sentinel_ds = sentinel_ds.rio.write_crs(sentinel_crs)
    sentinel_ds_latlon = sentinel_ds.rio.reproject("EPSG:4326")
    var = sentinel_ds_latlon['VV'].isel(time=-1)
    valid_mask = ~np.isnan(var.values)
    
    lats = sentinel_ds_latlon['y'].values
    lons = sentinel_ds_latlon['x'].values
    lon2d, lat2d = np.meshgrid(lons, lats)
    valid_points = np.column_stack([lon2d[valid_mask], lat2d[valid_mask]])
    gdf_points = gpd.GeoDataFrame(geometry=gpd.points_from_xy(valid_points[:,0], valid_points[:,1]), crs="EPSG:4326")
    tile_polygon = gdf_points.geometry.union_all().convex_hull  # creating a polygon for selecting the ascat points
    print("Done")

    print("Loading ASCAT data...")
    paths = yaml.safe_load(Path("../paths.yml").read_text())
    system_key = "windows" if platform.system() == "Windows" else "linux"
    root = Path(paths[system_key]).expanduser()
    cell_source = root / "datasets/scat_ard/ascat_ssm_cdr_12.5km_h121"
    assert cell_source.exists(), f"ASCAT data path does not exist: {cell_source}"

    h121_reader = CellGridFiles.from_product_id(cell_source, "H121")
    ascat_ds = h121_reader.read(geom = tile_polygon)
    print("Done")

    print("Processing...")
    points_ds = ascat_ds.cf_geom.to_point_array()
    x, y = Transformer.from_crs("EPSG:4326", sentinel_crs, always_xy=True).transform(points_ds.lon, points_ds.lat)
    points_ds['e7x'] = ('obs', x)
    points_ds['e7y'] = ('obs', y)

    ascat_sentinel = sentinel_ds.sel(
        x=points_ds.e7x.load(),
        y=points_ds.e7y.load(),
        time=points_ds.time.load(),
        method="nearest"
    )

    ascat_ds['s1-VH'] = ("obs", ascat_sentinel['VH'].data)
    ascat_ds['s1-VV'] = ("obs", ascat_sentinel['VV'].data)
    print("Done")

    print(f"Saving output to {output_path}...")
    ascat_ds['subsurface_scattering_probability'].encoding.update({'dtype': 'float32'}) # Ensure correct dtype as NANs are a problem otherwise
    with ProgressBar():
        ascat_ds.to_netcdf(output_path)
    print(f"Done! Collocated dataset saved to {output_path}")


In [None]:
# Europe
europe_files = [d for d in os.listdir("../data/Sentinel-1/Europe") if d.endswith(".zarr")]

for d in tqdm(europe_files, desc="Processing Europe"):
    sentinel_path = os.path.join("../data/Sentinel-1/Europe", d)
    output_path = os.path.join("../data/ascat_h121_sentinel-1/Europe", 
                               "ascat_h121_" +  d.replace(".zarr", ".nc"))
    
    print(f"Processing Europe: {sentinel_path} -> {output_path}")
    process_sentinel_and_ascat(
        sentinel_path=sentinel_path,
        output_path=output_path,
        sentinel_crs="EPSG:27704"
    )

# Africa
africa_files = [d for d in os.listdir("../data/Sentinel-1/Africa") if d.endswith(".zarr")]

for d in tqdm(africa_files, desc="Processing Africa"):
    sentinel_path = os.path.join("../data/Sentinel-1/Africa", d)
    output_path = os.path.join("../data/ascat_h121_sentinel/Africa", 
                               "ascat_h121_" +  d.replace(".zarr", ".nc"))
    
    print(f"Processing Africa: {sentinel_path} -> {output_path}")
    process_sentinel_and_ascat(
        sentinel_path=sentinel_path,
        output_path=output_path,
        sentinel_crs="EPSG:27701"
    )

## Checking the Results

In [None]:
folders = [
    "../data/ascat_h121_sentinel-1/Africa",
    "../data/ascat_h121_sentinel-1/Europe"
]

summary = []

for folder in folders:
    for filename in os.listdir(folder):
        filepath = os.path.join(folder, filename)

        ds = xr.open_dataset(filepath)

        sizes = ds.sizes
        n_obs = sizes.get("obs", None)
        n_locations = sizes.get("locations", None)

        time_values = pd.to_datetime(ds["time"].values)

        n_timesteps = len(time_values)
        first_time = time_values[0]
        last_time = time_values[-1]

        month_periods = pd.Series(time_values).dt.to_period("M")
        month_counts = month_periods.value_counts()
        min_obs_per_month = month_counts.min() if not month_counts.empty else 0
        months_covered = month_counts.size

        summary.append([
            filepath, n_locations, n_obs, n_timesteps,
            first_time, last_time,
            min_obs_per_month, months_covered
        ])

df_summary = pd.DataFrame(summary, columns=[
    "path", "n_locations", "n_obs", "n_timesteps", "first_time", "last_time",
    "min_obs_per_month", "months_covered"
])

print(df_summary.to_string(index=False))

In [None]:
ds = xr.open_dataset("../data/ascat_h121_sentinel-1/Europe/test_E033N012T3.nc")
ds

### Check Timeseries

In [None]:
# Select one location
loc_id = ds['location_id'].values[0]

subset = ds.where(ds['location_id'] == loc_id, drop=True)
subset

In [None]:
time_index = pd.to_datetime(subset['time'].values)

mask = (time_index >= '2021-01-01') & (time_index <= '2023-12-31')

filtered_time = time_index[mask]
filtered_s1_VV = subset['s1-VV'].values[mask]
filtered_backscatter40 = subset['backscatter40'].values[mask]

plt.figure(figsize=(12,6))
plt.plot(filtered_time, filtered_s1_VV, label='Sentinel-1 backscatter VV')
plt.plot(filtered_time, filtered_backscatter40, label='ASCAT backscatter at 40')
plt.xlabel('Time')
plt.ylabel('backscatter')
plt.title(f's1-VV Time Series for location_id {loc_id}')
plt.legend()
plt.grid(True)
plt.show()

For loading two tiles simutaniously consider using:

https://github.com/TUW-GEO/ascat/blob/master/src/ascat/cell.py#L341
(def _merge_contiguous(self, data):)


### Check alignment ASCAT points on Sentinel tiles

In [None]:
zarr_ds = xr.open_zarr('../data/Sentinel-1/Europe/E033N012T3.zarr')
vv = zarr_ds['VV'].isel(time=10)
vv_latlon = vv.rio.reproject("EPSG:4326")

lons = ds['lon'].values
lats = ds['lat'].values

plt.figure(figsize=(10, 8))
ax = plt.axes(projection=ccrs.PlateCarree())

buffer = 0.5
ax.set_extent([
    vv_latlon.rio.bounds()[0] - buffer, vv_latlon.rio.bounds()[2] + buffer,
    vv_latlon.rio.bounds()[1] - buffer, vv_latlon.rio.bounds()[3] + buffer
], crs=ccrs.PlateCarree())

ax.add_feature(cfeature.LAND, facecolor='lightgray')
ax.add_feature(cfeature.COASTLINE)
ax.gridlines(draw_labels=True)

img = vv_latlon.plot.imshow(ax=ax, transform=ccrs.PlateCarree(), cmap='viridis', add_colorbar=False)
ax.scatter(lons, lats, color='red', s=10, label='901 Locations', transform=ccrs.PlateCarree())
plt.colorbar(img, ax=ax, label='VV (dB)')
plt.title("E033N012T3")
plt.legend()
plt.tight_layout()
plt.show()


## Testing the Workflow

In [None]:
# testing the function

sentinel_path = "../data/Sentinel-1/Europe/E033N012T3.zarr"
output_path = "../data/ascat_h121_sentinel-1/Europe/test_E033N012T3.nc"
sentinel_crs = "EPSG:27704"

# ----------------------------------------------------

print("Loading Sentinel-1 data...")
sentinel_ds = xr.open_zarr(sentinel_path)
sentinel_ds = sentinel_ds.rio.write_crs(sentinel_crs)

sentinel_ds_latlon = sentinel_ds.rio.reproject("EPSG:4326")

# bbox:
# latmin = float(sentinel_ds_latlon.y.min())
# latmax = float(sentinel_ds_latlon.y.max())
# lonmin = float(sentinel_ds_latlon.x.min())
# lonmax = float(sentinel_ds_latlon.x.max())
# bbox = (latmin, latmax, lonmin, lonmax)

# polygon:
var = sentinel_ds_latlon['VV'].isel(time=-1)
valid_mask = ~np.isnan(var.values)

lats = sentinel_ds_latlon['y'].values
lons = sentinel_ds_latlon['x'].values
lon2d, lat2d = np.meshgrid(lons, lats)
valid_points = np.column_stack([lon2d[valid_mask], lat2d[valid_mask]])
gdf_points = gpd.GeoDataFrame(geometry=gpd.points_from_xy(valid_points[:,0], valid_points[:,1]), crs="EPSG:4326")
tile_polygon = gdf_points.geometry.union_all().convex_hull
print("Done")

print("Loading ASCAT data...")
paths = yaml.safe_load(Path("../paths.yml").read_text())
system_key = "windows" if platform.system() == "Windows" else "linux"
root = Path(paths[system_key]).expanduser()
cell_source = root / "datasets/scat_ard/ascat_ssm_cdr_12.5km_h121"
assert cell_source.exists(), f"ASCAT data path does not exist: {cell_source}"

h121_reader = CellGridFiles.from_product_id(cell_source, "H121")
ascat_ds = h121_reader.read(geom = tile_polygon)
print("Done")

print("Processing...")
points_ds = ascat_ds.cf_geom.to_point_array()
x, y = Transformer.from_crs("EPSG:4326", sentinel_crs, always_xy=True).transform(points_ds.lon, points_ds.lat)
points_ds['e7x'] = ('obs', x)
points_ds['e7y'] = ('obs', y)

ascat_sentinel = sentinel_ds.sel(
    x=points_ds.e7x.load(),
    y=points_ds.e7y.load(),
    time=points_ds.time.load(),
    method="nearest"
)

ascat_ds['s1-VH'] = ("obs", ascat_sentinel['VH'].data)
ascat_ds['s1-VV'] = ("obs", ascat_sentinel['VV'].data)
print("Done")

print(f"Saving output to {output_path}...")
ascat_ds['subsurface_scattering_probability'].encoding.update({'dtype': 'float32'}) # Ensure correct dtype as NANs are a problem otherwise
with ProgressBar():
    ascat_ds.to_netcdf(output_path)
print(f"Done! Collocated dataset saved to {output_path}")