In [18]:
import bottleneck as bn
import dask
import folium
import holoviews as hv
import hvplot.xarray
import matplotlib.pyplot as plt
import numpy as np
import pystac_client
import xarray as xr
import yaml
import zarr

from ascat.read_native.ragged_array_ts import CellFileCollection
from collections import defaultdict
from odc import stac as odc_stac
import odc.stac
from pathlib import Path
from pyproj import Transformer
from scipy.spatial import cKDTree
import warnings

dask.config.set(**{"array.slicing.split_large_chunks": True})
hv.extension("bokeh")

with warnings.catch_warnings():
    warnings.simplefilter("ignore", UserWarning)
    # h121_ds = h121_ds.set_index(time="time")

### Loading Sentinel - 1 Sigma Naught Data from EODC STAC Catalogue

In [19]:
eodc_catalog = pystac_client.Client.open("https://stac.eodc.eu/api/v1")

collection_id = "SENTINEL1_SIG0_20M"

collection = eodc_catalog.get_collection(collection_id)

Setting time and area of interest.

In [20]:
# Area for testing the code (selected area so e.g. the ffill is visible)
#"""
time_range ="2022-01-06/2022-01-07" 

latmin, latmax = 44, 45         # South to North
lonmin, lonmax = 7, 8           # West to East

bounding_box = [lonmin, latmin, lonmax, latmax]
#"""

In [4]:
# Europe Africa Intersection
"""
time_range ="2022-01-06/2022-01-07"

latmin, latmax = 35, 37            # South to North
lonmin, lonmax = -6, -5           # West to East

bounding_box = [lonmin, latmin, lonmax, latmax]
#"""

'\ntime_range ="2022-01-06/2022-01-07"\n\nlatmin, latmax = 35, 37            # South to North\nlonmin, lonmax = -6, -5           # West to East\n\nbounding_box = [lonmin, latmin, lonmax, latmax]\n#'

In [5]:
# Whole Spatial Area but one image
"""
time_range ="2022-01-06T18:28:01.000000000"

latmin, latmax = 30.0, 45.0            # South to North
lonmin, lonmax = -10.0, 5.0            # West to East

bounding_box = [lonmin, latmin, lonmax, latmax]
#"""

'\ntime_range ="2022-01-06T18:28:01.000000000"\n\nlatmin, latmax = 30.0, 45.0            # South to North\nlonmin, lonmax = -10.0, 5.0            # West to East\n\nbounding_box = [lonmin, latmin, lonmax, latmax]\n#'

In [None]:
# Area and Time of Interest
# This will not finish on your local pc
# Try to run in on a large Cluster (e.g. Arsenal)
"""
time_range = "2021-01-01/2023-12-31"

latmin, latmax = 30.0, 45.0            # South to North
lonmin, lonmax = -10.0, 5.0            # West to East

bounding_box = [lonmin, latmin, lonmax, latmax]
#"""

'\ntime_range = "2021-01-01/2023-12-31"\n\nlatmin, latmax = 30.0, 45.0            # South to North\nlonmin, lonmax = -10.0, 5.0            # West to East\n\nbounding_box = [lonmin, latmin, lonmax, latmax]\n#'

Loading the metadata with STAC search engine.

In [21]:
search = eodc_catalog.search(
    collections=collection_id,
    bbox=bounding_box,
    datetime=time_range,
    #max_items=1  # number of max items to load
)
items_eodc = search.item_collection()
print(f"On EODC we found {len(items_eodc)} items for the given search query")

On EODC we found 12 items for the given search query


This data is stored in the Equi7 grid. But our area of interest is in both the european Equi7 grid and the african Equi7 grid. 

Azimuthal Equidistant
- Center latitude: 53        -> Europa
- Center longitude: 24

Azimuthal Equidistant
- Center latitude: 8.5       -> Afrika
- Center longitude: 21.5

We will create two zar files for the different grids that can be fused later.

In [22]:
items_crs_europe = []
items_crs_africa = []
wkt_europe = None
wkt_africa = None

for item in items_eodc:
    wkt = item.properties["proj:wkt2"]
    lat_center = float(wkt.split('PARAMETER["latitude_of_center",')[1].split(']')[0])

    if lat_center == 53.0:
        items_crs_europe.append(item)
        wkt_europe = wkt
    elif lat_center == 8.5:
        items_crs_africa.append(item)
        wkt_africa = wkt

print(f"Equi7 Europe (lat 53.0): {len(items_crs_europe)} items")
print(wkt_europe, "\n")
print(f"Equi7 Africa (lat 8.5): {len(items_crs_africa)} items")
print(wkt_africa)

Equi7 Europe (lat 53.0): 12 items
PROJCS["Azimuthal_Equidistant",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AUTHORITY["EPSG","4326"]],PROJECTION["Azimuthal_Equidistant"],PARAMETER["latitude_of_center",53],PARAMETER["longitude_of_center",24],PARAMETER["false_easting",5837287.81977],PARAMETER["false_northing",2121415.69617],UNIT["metre",1,AUTHORITY["EPSG","9001"]]] 

Equi7 Africa (lat 8.5): 0 items
None


Let's plot thumbnail of the loaded items for this area and those dates.

In [23]:
# Showing thumbnail image of the data
#"""
map = folium.Map(
    location=[(latmin + latmax) / 2, (lonmin + lonmax) / 2],
    zoom_start=7,
    zoom_control=True,
    scrollWheelZoom=True,
    dragging=True,
)

folium.Rectangle(
    bounds=[[latmin, lonmin], [latmax, lonmax]],
    color="blue",
    fill=True,
    fill_opacity=0.1,
    weight=2,
    popup="Area of Interest",
).add_to(map)

for item in items_eodc:
    # url leading to display of an item, can also be used as hyperlink
    image_url = item.assets["thumbnail"].href
    bounds = item.bbox
    folium.raster_layers.ImageOverlay(
        image=image_url,
        bounds=[[bounds[1], bounds[0]], [bounds[3], bounds[2]]],
    ).add_to(map)

folium.LayerControl().add_to(map)

map
#"""

## Plan

#### Step 1: Create 6km Sentinel1 data
- ...
- resample from 20m to 6.25km in the Equi7 Grid



#### Step 2: Align with 12.5km ASCAT data
- ...
- do the projection to FibGrid (grid remapping)
- use nearest neighbour to go to 12.5km (align with the ASCAT data)
- ...





### Projection
Sentinel1 data is saved using the Equi7 grid. As our area spanns over Europe and Africa the Equi7_Europe and the Equi7_Africa are used.

?We project the data from the african Equi7 grip to the european one (EPSG:27704). We do it because the distoritons should be minimal because the area is africa is quite close to europe.?(Talk to BBM)

In [24]:
crs = "EPSG:27704"
res =  20

chunks = {"time": 1, "latitude": 100, "longitude": 100}
sig0_dc = odc_stac.load(
    items_eodc,
    crs=crs,                                       # reproject from equi7_Africa to europe should be no problem
    resolution=res,
    bbox=bounding_box,
    chunks=chunks,
    resampling="bilinear",                         # not good for dB data?! -> nearest would be better
)

nodata = items_eodc[0].assets["VV"].extra_fields["raster:bands"][0]["nodata"]
scale = items_eodc[0].assets["VV"].extra_fields["raster:bands"][0]["scale"]

sig0_dc = sig0_dc.where(sig0_dc != nodata) / scale

sig0_dc = sig0_dc.dropna(dim="time", how="all")        # this could be problematic for the runtime
sig0_dc


Unnamed: 0,Array,Chunk
Bytes,368.44 MiB,122.81 MiB
Shape,"(3, 6340, 5078)","(1, 6340, 5078)"
Dask graph,3 chunks in 8 graph layers,3 chunks in 8 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 368.44 MiB 122.81 MiB Shape (3, 6340, 5078) (1, 6340, 5078) Dask graph 3 chunks in 8 graph layers Data type float32 numpy.ndarray",5078  6340  3,

Unnamed: 0,Array,Chunk
Bytes,368.44 MiB,122.81 MiB
Shape,"(3, 6340, 5078)","(1, 6340, 5078)"
Dask graph,3 chunks in 8 graph layers,3 chunks in 8 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,368.44 MiB,122.81 MiB
Shape,"(3, 6340, 5078)","(1, 6340, 5078)"
Dask graph,3 chunks in 8 graph layers,3 chunks in 8 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 368.44 MiB 122.81 MiB Shape (3, 6340, 5078) (1, 6340, 5078) Dask graph 3 chunks in 8 graph layers Data type float32 numpy.ndarray",5078  6340  3,

Unnamed: 0,Array,Chunk
Bytes,368.44 MiB,122.81 MiB
Shape,"(3, 6340, 5078)","(1, 6340, 5078)"
Dask graph,3 chunks in 8 graph layers,3 chunks in 8 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [25]:
# Transform to linear scale to do calculations 
sig0_dc_linear = 10 ** (sig0_dc / 10)                                

# Merge in time direction 

sig0_dc_linear_hourly = sig0_dc_linear.resample(time="1h").mean(dim="time") # time between e.g. 05:00:00 and 05:59:59.999 is averaged into the 05:00 bin

sig0_dc_linear_hourly = sig0_dc_linear_hourly.dropna(dim="time", how="all") # remove all empty bins (this could be problematic for the runtime)

sig0_dc_linear_hourly

Unnamed: 0,Array,Chunk
Bytes,245.62 MiB,122.81 MiB
Shape,"(2, 6340, 5078)","(1, 6340, 5078)"
Dask graph,2 chunks in 31 graph layers,2 chunks in 31 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 245.62 MiB 122.81 MiB Shape (2, 6340, 5078) (1, 6340, 5078) Dask graph 2 chunks in 31 graph layers Data type float32 numpy.ndarray",5078  6340  2,

Unnamed: 0,Array,Chunk
Bytes,245.62 MiB,122.81 MiB
Shape,"(2, 6340, 5078)","(1, 6340, 5078)"
Dask graph,2 chunks in 31 graph layers,2 chunks in 31 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,245.62 MiB,122.81 MiB
Shape,"(2, 6340, 5078)","(1, 6340, 5078)"
Dask graph,2 chunks in 31 graph layers,2 chunks in 31 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 245.62 MiB 122.81 MiB Shape (2, 6340, 5078) (1, 6340, 5078) Dask graph 2 chunks in 31 graph layers Data type float32 numpy.ndarray",5078  6340  2,

Unnamed: 0,Array,Chunk
Bytes,245.62 MiB,122.81 MiB
Shape,"(2, 6340, 5078)","(1, 6340, 5078)"
Dask graph,2 chunks in 31 graph layers,2 chunks in 31 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [26]:
# lower the resolution from 20m to 6km  (as 6.25km does not work because 312.5 is not an int())
sig0_dc_linear_6km = sig0_dc_linear_hourly.coarsen(x=300, y=300, boundary="pad").mean() # padded with NA

sig0_dc_linear_6km

Unnamed: 0,Array,Chunk
Bytes,2.92 kiB,1.31 kiB
Shape,"(2, 22, 17)","(1, 21, 16)"
Dask graph,8 chunks in 40 graph layers,8 chunks in 40 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.92 kiB 1.31 kiB Shape (2, 22, 17) (1, 21, 16) Dask graph 8 chunks in 40 graph layers Data type float32 numpy.ndarray",17  22  2,

Unnamed: 0,Array,Chunk
Bytes,2.92 kiB,1.31 kiB
Shape,"(2, 22, 17)","(1, 21, 16)"
Dask graph,8 chunks in 40 graph layers,8 chunks in 40 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.92 kiB,1.31 kiB
Shape,"(2, 22, 17)","(1, 21, 16)"
Dask graph,8 chunks in 40 graph layers,8 chunks in 40 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.92 kiB 1.31 kiB Shape (2, 22, 17) (1, 21, 16) Dask graph 8 chunks in 40 graph layers Data type float32 numpy.ndarray",17  22  2,

Unnamed: 0,Array,Chunk
Bytes,2.92 kiB,1.31 kiB
Shape,"(2, 22, 17)","(1, 21, 16)"
Dask graph,8 chunks in 40 graph layers,8 chunks in 40 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [27]:
# ffill values 
sig_dc_linear_6km_filled = sig0_dc_linear_6km.ffill(dim="time")

sig_dc_linear_6km_filled

Unnamed: 0,Array,Chunk
Bytes,2.92 kiB,1.31 kiB
Shape,"(2, 22, 17)","(1, 21, 16)"
Dask graph,8 chunks in 42 graph layers,8 chunks in 42 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.92 kiB 1.31 kiB Shape (2, 22, 17) (1, 21, 16) Dask graph 8 chunks in 42 graph layers Data type float32 numpy.ndarray",17  22  2,

Unnamed: 0,Array,Chunk
Bytes,2.92 kiB,1.31 kiB
Shape,"(2, 22, 17)","(1, 21, 16)"
Dask graph,8 chunks in 42 graph layers,8 chunks in 42 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.92 kiB,1.31 kiB
Shape,"(2, 22, 17)","(1, 21, 16)"
Dask graph,8 chunks in 42 graph layers,8 chunks in 42 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.92 kiB 1.31 kiB Shape (2, 22, 17) (1, 21, 16) Dask graph 8 chunks in 42 graph layers Data type float32 numpy.ndarray",17  22  2,

Unnamed: 0,Array,Chunk
Bytes,2.92 kiB,1.31 kiB
Shape,"(2, 22, 17)","(1, 21, 16)"
Dask graph,8 chunks in 42 graph layers,8 chunks in 42 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [28]:
# Convert back to dB
sig_dc_6km = 10 * np.log10(sig_dc_linear_6km_filled)#.compute()

In [15]:
# Show steps visually:
""" only run if in testing mode

sig0_dc_linear.VV.plot.imshow(col="time", col_wrap=3, robust=True)
plt.show()

sig0_dc_linear_hourly.VV.plot.imshow(col="time", col_wrap=2, robust=True)
plt.show()

sig0_dc_linear_6km.VV.plot.imshow(col="time", col_wrap=2, robust=True)
plt.show()

sig_dc_linear_6km_filled.VV.plot.imshow(col="time", col_wrap=2, robust=True)
plt.show()
#"""


' only run if in testing mode\n\nsig0_dc_linear.VV.plot.imshow(col="time", col_wrap=3, robust=True)\nplt.show()\n\nsig0_dc_linear_hourly.VV.plot.imshow(col="time", col_wrap=2, robust=True)\nplt.show()\n\nsig0_dc_linear_6km.VV.plot.imshow(col="time", col_wrap=2, robust=True)\nplt.show()\n\nsig_dc_linear_6km_filled.VV.plot.imshow(col="time", col_wrap=2, robust=True)\nplt.show()\n#'

In [29]:
sig_dc_6km.to_zarr("output_6km_filled.zarr", mode="w")     # scaling? encoding? or is this enough?

<xarray.backends.zarr.ZarrStore at 0x1c3801b5090>

### Tests

Different tests were performed to queck if the code does what I expect it to do. Some were done by visually looking at some data snippets:
<img src="../images/Data_Processing.png" alt="Data Processing" width="500"/>

In this image we can clearly see the hourly combination, the coarsing and the ffill.
<img src="../images/Equi7-EuropeAfrica_Visual-Inspection.png" alt="Projections" width="500"/>

Here we can see that at least visually and in low resolution the different projections do not cause problems.

Distortions further south?
