In [62]:
import bottleneck as bn
import dask
import folium
import holoviews as hv
import hvplot.xarray
import matplotlib.pyplot as plt
import numpy as np
import pystac_client
import xarray as xr
import yaml
import zarr

from ascat.read_native.ragged_array_ts import CellFileCollection
from collections import defaultdict
from odc import stac as odc_stac
import odc.stac
from pathlib import Path
from pyproj import Transformer
from scipy.spatial import cKDTree
import warnings
from collections import defaultdict

#dask.config.set(**{"array.slicing.split_large_chunks": True})
#hv.extension("bokeh")

#with warnings.catch_warnings():
    #warnings.simplefilter("ignore", UserWarning)
    # h121_ds = h121_ds.set_index(time="time")

### Loading Sentinel - 1 Sigma Naught Data from EODC STAC Catalogue

In [63]:
eodc_catalog = pystac_client.Client.open("https://stac.eodc.eu/api/v1")

collection_id = "SENTINEL1_SIG0_20M"

collection = eodc_catalog.get_collection(collection_id)

Setting time and area of interest.

In [None]:
# Area for testing the code (selected area so e.g. the ffill is visible)
"""
time_range ="2022-01-06/2022-01-07" 

latmin, latmax = 44, 45         # South to North
lonmin, lonmax = 7, 8           # West to East

bounding_box = [lonmin, latmin, lonmax, latmax]
#"""

In [4]:
# Europe Africa Intersection
"""
time_range ="2022-01-06/2022-01-07"

latmin, latmax = 35, 37            # South to North
lonmin, lonmax = -6, -5           # West to East

bounding_box = [lonmin, latmin, lonmax, latmax]
#"""

'\ntime_range ="2022-01-06/2022-01-07"\n\nlatmin, latmax = 35, 37            # South to North\nlonmin, lonmax = -6, -5           # West to East\n\nbounding_box = [lonmin, latmin, lonmax, latmax]\n#'

In [64]:
# Whole Spatial Area but...
#"""
#time_range ="2022-01-06T18:28:01.000000000" # one image
time_range = "2022-01-06/2022-01-06"  # one day

latmin, latmax = 30.0, 45.0            # South to North
lonmin, lonmax = -10.0, 5.0            # West to East

bounding_box = [lonmin, latmin, lonmax, latmax]
#"""

In [6]:
# Area and Time of Interest
# This will not finish on your local pc
# Try to run in on a large Cluster (e.g. Arsenal)
"""
time_range = "2021-01-01/2023-12-31"

latmin, latmax = 30.0, 45.0            # South to North
lonmin, lonmax = -10.0, 5.0            # West to East

bounding_box = [lonmin, latmin, lonmax, latmax]

#append to zar monthly ffill?
#"""

'\ntime_range = "2021-01-01/2023-12-31"\n\nlatmin, latmax = 30.0, 45.0            # South to North\nlonmin, lonmax = -10.0, 5.0            # West to East\n\nbounding_box = [lonmin, latmin, lonmax, latmax]\n\n#append to zar monthly ffill?\n#'

Loading the metadata with STAC search engine.

In [65]:
search = eodc_catalog.search(
    collections=collection_id,
    bbox=bounding_box,
    datetime=time_range,
    #max_items=1  # number of max items to load
)
items_eodc = search.item_collection()
print(f"On EODC we found {len(items_eodc)} items for the given search query")

On EODC we found 57 items for the given search query


This data is stored in the Equi7 grid. But our area of interest is in both the european Equi7 grid and the african Equi7 grid. 

Azimuthal Equidistant
- Center latitude: 53        -> Europa
- Center longitude: 24

Azimuthal Equidistant
- Center latitude: 8.5       -> Afrika
- Center longitude: 21.5

We will create two zar files for the different grids that can be fused later.

In [66]:
#"""
items_crs_europe = []
items_crs_africa = []
wkt_europe = None
wkt_africa = None

for item in items_eodc:
    wkt = item.properties["proj:wkt2"]
    lat_center = float(wkt.split('PARAMETER["latitude_of_center",')[1].split(']')[0])

    if lat_center == 53.0:
        items_crs_europe.append(item)
        wkt_europe = wkt
    elif lat_center == 8.5:
        items_crs_africa.append(item)
        wkt_africa = wkt

print(f"Equi7 Europe (lat 53.0): {len(items_crs_europe)} items")
print(wkt_europe, "\n")
print(f"Equi7 Africa (lat 8.5): {len(items_crs_africa)} items")
print(wkt_africa)
#"""

Equi7 Europe (lat 53.0): 35 items
PROJCS["Azimuthal_Equidistant",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AUTHORITY["EPSG","4326"]],PROJECTION["Azimuthal_Equidistant"],PARAMETER["latitude_of_center",53],PARAMETER["longitude_of_center",24],PARAMETER["false_easting",5837287.81977],PARAMETER["false_northing",2121415.69617],UNIT["metre",1,AUTHORITY["EPSG","9001"]]] 

Equi7 Africa (lat 8.5): 22 items
PROJCS["Azimuthal_Equidistant",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AUTHORITY["EPSG","4326"]],PROJECTION["Azimuthal_Equidistant"],PARAMETER["false_easting",5621452.01998],PARAMETER["false_northing",5990638.42298],PARAMETER["longitude_of_center",21.5],PARAMETER["latitude_of_center",8.5],UNIT["metre",1,AUTHORITY["EPSG

In [69]:
# Europe and African tiles are grouped

def extract_tile_id_from_name(item_id):
    parts = item_id.split("_")
    tile_id = parts[3]
    if len(tile_id) == 10:
            return tile_id

european_tiles = defaultdict(list)
african_tiles = defaultdict(list)

for item in items_crs_europe:
    tile_id = extract_tile_id_from_name(item.id)
    if item not in european_tiles[tile_id]:
        european_tiles[tile_id].append(item)

for item in items_crs_africa:
    tile_id = extract_tile_id_from_name(item.id)
    if item not in african_tiles[tile_id]:
        african_tiles[tile_id].append(item)

european_tiles

defaultdict(list,
            {'E033N015T3': [<Item id=SIG0_20220106T182916_A147_E033N015T3_EU020M_V0M2R4_S1AIWGRDH>,
              <Item id=SIG0_20220106T182851_A147_E033N015T3_EU020M_V0M2R4_S1AIWGRDH>,
              <Item id=SIG0_20220106T182826_A147_E033N015T3_EU020M_V0M2R4_S1AIWGRDH>],
             'E030N015T3': [<Item id=SIG0_20220106T182916_A147_E030N015T3_EU020M_V0M2R4_S1AIWGRDH>,
              <Item id=SIG0_20220106T182851_A147_E030N015T3_EU020M_V0M2R4_S1AIWGRDH>,
              <Item id=SIG0_20220106T182826_A147_E030N015T3_EU020M_V0M2R4_S1AIWGRDH>],
             'E033N012T3': [<Item id=SIG0_20220106T182826_A147_E033N012T3_EU020M_V0M2R4_S1AIWGRDH>,
              <Item id=SIG0_20220106T182801_A147_E033N012T3_EU020M_V0M2R4_S1AIWGRDH>,
              <Item id=SIG0_20220106T182736_A147_E033N012T3_EU020M_V0M2R4_S1AIWGRDH>],
             'E030N012T3': [<Item id=SIG0_20220106T182826_A147_E030N012T3_EU020M_V0M2R4_S1AIWGRDH>,
              <Item id=SIG0_20220106T182801_A147_E030N012T3_EU0

### Information on Equi7 grid:
https://cartography.tuwien.ac.at/eurocarto/wp-content/uploads/2015/09/3_6_ppt.pdf

which tiles do we even want to run? Maybe decide on this later. Now try to run the compute on one tile

- Quickly test loading only one tile
- Then tink of how to combine them

Let's plot thumbnail of the loaded items for this area and those dates.

In [None]:
# Showing thumbnail image of the data
"""
map = folium.Map(
    location=[(latmin + latmax) / 2, (lonmin + lonmax) / 2],
    zoom_start=7,
    zoom_control=True,
    scrollWheelZoom=False,
    dragging=False,
)

folium.Rectangle(
    bounds=[[latmin, lonmin], [latmax, lonmax]],
    color="blue",
    fill=True,
    fill_opacity=0.1,
    weight=2,
    popup="Area of Interest",
).add_to(map)

for item in african_tiles['E030N090T3']:
    # url leading to display of an item, can also be used as hyperlink
    image_url = item.assets["thumbnail"].href
    bounds = item.bbox
    folium.raster_layers.ImageOverlay(
        image=image_url,
        bounds=[[bounds[1], bounds[0]], [bounds[3], bounds[2]]],
    ).add_to(map)

folium.LayerControl().add_to(map)

map
#"""

### Projection
Sentinel1 data is saved using the Equi7 grid. As our area spanns over Europe and Africa the Equi7_Europe and the Equi7_Africa are used.

?We project the data from the african Equi7 grip to the european one (EPSG:27704). We do it because the distoritons should be minimal because the area is africa is quite close to europe.?(Talk to BBM)

In [68]:
crs = "EPSG:27704" # or african for africa
res =  20

chunks = {"time": 1, "latitude": 100, "longitude": 100}
sig0_dc = odc_stac.load(
    european_tiles['E033N015T3'],
    crs=crs,                                       
    resolution=res,
    #bbox=bounding_box,
    chunks=chunks,
    resampling="bilinear",                         # not good for dB data?! -> nearest would be better
)

nodata = items_eodc[0].assets["VV"].extra_fields["raster:bands"][0]["nodata"]
scale = items_eodc[0].assets["VV"].extra_fields["raster:bands"][0]["scale"]

sig0_dc = sig0_dc.where(sig0_dc != nodata) / scale

#sig0_dc = sig0_dc.dropna(dim="time", how="all") this is too computationaly expensive
sig0_dc


Unnamed: 0,Array,Chunk
Bytes,2.51 GiB,858.31 MiB
Shape,"(3, 15000, 15000)","(1, 15000, 15000)"
Dask graph,3 chunks in 7 graph layers,3 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.51 GiB 858.31 MiB Shape (3, 15000, 15000) (1, 15000, 15000) Dask graph 3 chunks in 7 graph layers Data type float32 numpy.ndarray",15000  15000  3,

Unnamed: 0,Array,Chunk
Bytes,2.51 GiB,858.31 MiB
Shape,"(3, 15000, 15000)","(1, 15000, 15000)"
Dask graph,3 chunks in 7 graph layers,3 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.51 GiB,858.31 MiB
Shape,"(3, 15000, 15000)","(1, 15000, 15000)"
Dask graph,3 chunks in 7 graph layers,3 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.51 GiB 858.31 MiB Shape (3, 15000, 15000) (1, 15000, 15000) Dask graph 3 chunks in 7 graph layers Data type float32 numpy.ndarray",15000  15000  3,

Unnamed: 0,Array,Chunk
Bytes,2.51 GiB,858.31 MiB
Shape,"(3, 15000, 15000)","(1, 15000, 15000)"
Dask graph,3 chunks in 7 graph layers,3 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [52]:
# Transform to linear scale to do calculations 
sig0_dc_linear = 10 ** (sig0_dc / 10)                                

# Merge in time direction 

sig0_dc_linear_hourly = sig0_dc_linear.resample(time="1h").mean(dim="time").persist() # time between e.g. 05:00:00 and 05:59:59.999 is averaged into the 05:00 bin

sig0_dc_linear_hourly = sig0_dc_linear_hourly.dropna(dim="time", how="all")           # remove all empty bins (this could be problematic for the runtime)

sig0_dc_linear_hourly

KeyboardInterrupt: 

In [12]:
# lower the resolution from 20m to 6.24km  (as 6.25km does not work because 312.5 is not an int())
sig0_dc_linear_6km = sig0_dc_linear_hourly.coarsen(x=312, y=312, boundary="pad").mean() # padded with NA

sig0_dc_linear_6km

Unnamed: 0,Array,Chunk
Bytes,2.79 kiB,1.25 kiB
Shape,"(2, 21, 17)","(1, 20, 16)"
Dask graph,8 chunks in 39 graph layers,8 chunks in 39 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.79 kiB 1.25 kiB Shape (2, 21, 17) (1, 20, 16) Dask graph 8 chunks in 39 graph layers Data type float32 numpy.ndarray",17  21  2,

Unnamed: 0,Array,Chunk
Bytes,2.79 kiB,1.25 kiB
Shape,"(2, 21, 17)","(1, 20, 16)"
Dask graph,8 chunks in 39 graph layers,8 chunks in 39 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 kiB,1.25 kiB
Shape,"(2, 21, 17)","(1, 20, 16)"
Dask graph,8 chunks in 39 graph layers,8 chunks in 39 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.79 kiB 1.25 kiB Shape (2, 21, 17) (1, 20, 16) Dask graph 8 chunks in 39 graph layers Data type float32 numpy.ndarray",17  21  2,

Unnamed: 0,Array,Chunk
Bytes,2.79 kiB,1.25 kiB
Shape,"(2, 21, 17)","(1, 20, 16)"
Dask graph,8 chunks in 39 graph layers,8 chunks in 39 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [13]:
# ffill values 
sig_dc_linear_6km_filled = sig0_dc_linear_6km.ffill(dim="time")

sig_dc_linear_6km_filled

Unnamed: 0,Array,Chunk
Bytes,2.79 kiB,1.25 kiB
Shape,"(2, 21, 17)","(1, 20, 16)"
Dask graph,8 chunks in 41 graph layers,8 chunks in 41 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.79 kiB 1.25 kiB Shape (2, 21, 17) (1, 20, 16) Dask graph 8 chunks in 41 graph layers Data type float32 numpy.ndarray",17  21  2,

Unnamed: 0,Array,Chunk
Bytes,2.79 kiB,1.25 kiB
Shape,"(2, 21, 17)","(1, 20, 16)"
Dask graph,8 chunks in 41 graph layers,8 chunks in 41 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.79 kiB,1.25 kiB
Shape,"(2, 21, 17)","(1, 20, 16)"
Dask graph,8 chunks in 41 graph layers,8 chunks in 41 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.79 kiB 1.25 kiB Shape (2, 21, 17) (1, 20, 16) Dask graph 8 chunks in 41 graph layers Data type float32 numpy.ndarray",17  21  2,

Unnamed: 0,Array,Chunk
Bytes,2.79 kiB,1.25 kiB
Shape,"(2, 21, 17)","(1, 20, 16)"
Dask graph,8 chunks in 41 graph layers,8 chunks in 41 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [14]:
# Convert back to dB
sig_dc_6km = 10 * np.log10(sig_dc_linear_6km_filled)#.compute()

In [15]:
sig_dc_6km.to_zarr("output_6km_filled.zarr", mode="w")  

<xarray.backends.zarr.ZarrStore at 0x1d1030236d0>

In [16]:
# Show steps visually:
""" only run if in testing mode

sig0_dc_linear.VV.plot.imshow(col="time", col_wrap=3, robust=True)
plt.show()

sig0_dc_linear_hourly.VV.plot.imshow(col="time", col_wrap=2, robust=True)
plt.show()

sig0_dc_linear_6km.VV.plot.imshow(col="time", col_wrap=2, robust=True)
plt.show()

sig_dc_linear_6km_filled.VV.plot.imshow(col="time", col_wrap=2, robust=True)
plt.show()
#"""

' only run if in testing mode\n\nsig0_dc_linear.VV.plot.imshow(col="time", col_wrap=3, robust=True)\nplt.show()\n\nsig0_dc_linear_hourly.VV.plot.imshow(col="time", col_wrap=2, robust=True)\nplt.show()\n\nsig0_dc_linear_6km.VV.plot.imshow(col="time", col_wrap=2, robust=True)\nplt.show()\n\nsig_dc_linear_6km_filled.VV.plot.imshow(col="time", col_wrap=2, robust=True)\nplt.show()\n#'


Output (subset):

<img src="../images/Data_Processing.png" alt="Data Processing" width="500"/>

In this image we can clearly see the hourly combination, the coarsing and the ffill.


#### Equi7 grid test
To check if the two different Equi7 grids cause any problems we had a look at the intersection

<img src="../images/Equi7-EuropeAfrica_Visual-Inspection.png" alt="Projections" width="500"/>

Here we can see that at least visually and in low resolution the different projections do not cause problems.

Distortions further south?
