# Generate SMOS Level 2C Datacube

This NB is supposed to be executed using [Papermill](https://papermill.readthedocs.io/).
The following cell defines the input parameters with their default values.
The cell is therefore tagged with the label `parameters`.
When executed with Papermill, the default parameters will be overwritten by the parameters 
supplied to the `papermill` tool in a new cell below the next one.

**CAUTION:** The following cell must be identical to the corresponding one in `estimate_costs.ipynb`.

In [1]:
product_type: str = "SMOS-L2C-SM"   # The SMOS product type, must be 'SMOS-L2C-SM' or 'SMOS-L2C-OS'.
time_range: str = "2022-01-01/2022-01-03" # Date range given as closed interval '<first>/<last>' with first and last having format 'YYYY-MM-DD'.
agg_interval: str = "1D"  # The averaging interval such as '1D', '2D', '1W', or empty, which means no aggregation.
res_level: int = 0  # Spatial resolution level in the range 0 to 4. Actual resolution in degrees is 360/8192 * 2^res_level.

In [2]:
# On EDC, this is a fixed path that receives the output of this NB.
# The data written here will end up in a unique location for every job run.
EDC_OUTPUT_DIR = "/home/jovyan/result-data"

In [3]:
import json
import math
import os
import shutil

from xcube.core.store import new_data_store
from zappend.api import zappend

from helpers import generate_slices
from helpers import get_time_ranges

In [4]:
if os.path.exists(EDC_OUTPUT_DIR):
    output_dir = EDC_OUTPUT_DIR
else:
    output_dir = "./output"
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

In [5]:
target_filename = f"{product_type}-{time_range.replace('-', '').replace('/', '-')}-{agg_interval}-res{res_level}.zarr"
target_dir = f'{output_dir}/{target_filename}'
target_dir

'./output/SMOS-L2C-SM-20220101-20220103-1D-res0.zarr'

In [6]:
if os.path.exists(target_dir):
   shutil.rmtree(target_dir) 

In [7]:
time_ranges = get_time_ranges(time_range, agg_interval)
time_ranges

[('2022-01-01', '2022-01-01'),
 ('2022-01-02', '2022-01-02'),
 ('2022-01-03', '2022-01-03')]

In [8]:
try:
    with open("creodias-credentials.json") as f:
        creodias_credentials = json.load(f)
except FileNotFoundException:
    creodias_credentials = {
        "key": os.environ("CREODIAS_S3_KEY"),
        "secret": os.environ("CREODIAS_S3_SECRET")
    }

In [9]:
smos_store = new_data_store(
    'smos', 
    source_path="s3://EODATA", 
    source_storage_options=dict(
        endpoint_url="https://s3.cloudferro.com", 
        anon=False, 
        **creodias_credentials
    ),
    cache_path="./nc_cache",
    xarray_kwargs=dict(
        engine="h5netcdf"
        #engine="netcdf4"
    )
)

In [10]:
res_factor = 2 ** res_level

max_width = 8192
max_height = 4032

width = max_width // res_factor
height = max_height // res_factor

tile_width = max(max_width // 4, width)
tile_height = max(max_height // 4, height)

time_chunk_size = 128

In [11]:
zappend_config = {
    "target_dir": target_dir, 
    
    "fixed_dims": {
        "lon": width,
        "lat": height
    },
    
    "append_dim": "time",
    
    "variables": {
        "*": {
            "encoding": {
                "chunks": [1, tile_height, tile_width]
            }
        },
        "time": {
            "encoding": {
                "chunks": [time_chunk_size]
            }
        },
        "time_bnds": {
            "encoding": {
                "chunks": [time_chunk_size, 2]
            }
        },
        "lat": {
            "encoding": {
                "chunks": [height]
            }
        },
        "lon": {
            "encoding": {
                "chunks": [width]
            }
        },
    },
    
    # Log to the console.
    # Note you could also configure the log output for dask here.
    "logging": {
        "version": 1,
        "formatters": {
            "normal": {
                "format": "%(asctime)s %(levelname)s %(message)s",
                "style": "%"
            }
        },
        "handlers": {
            "console": {
                "class": "logging.StreamHandler",
                "formatter": "normal"
            }
        },
        "loggers": {
            "zappend": {
                "level": "INFO",
                "handlers": ["console"]
            },
            "notebook": {
                "level": "INFO",
                "handlers": ["console"]
            },
            #"xcube-smos": {
            #    "level": "DEBUG",
            #    "handlers": ["console"]
            #}
        }
    }
}
# zappend_config

In [None]:
slices = generate_slices(smos_store, product_type, time_ranges, agg_interval, res_level)
zappend(slices, config=zappend_config)

2024-01-19 18:06:23,952 INFO Writing slice 1 of 29 to ./temp-2022-01-01-2022-01-01/slice-0.nc
2024-01-19 18:06:25,651 INFO Writing slice 2 of 29 to ./temp-2022-01-01-2022-01-01/slice-1.nc
2024-01-19 18:06:27,298 INFO Writing slice 3 of 29 to ./temp-2022-01-01-2022-01-01/slice-2.nc
2024-01-19 18:06:28,952 INFO Writing slice 4 of 29 to ./temp-2022-01-01-2022-01-01/slice-3.nc
2024-01-19 18:06:30,598 INFO Writing slice 5 of 29 to ./temp-2022-01-01-2022-01-01/slice-4.nc
2024-01-19 18:06:32,232 INFO Writing slice 6 of 29 to ./temp-2022-01-01-2022-01-01/slice-5.nc
2024-01-19 18:06:33,897 INFO Writing slice 7 of 29 to ./temp-2022-01-01-2022-01-01/slice-6.nc
2024-01-19 18:06:35,538 INFO Writing slice 8 of 29 to ./temp-2022-01-01-2022-01-01/slice-7.nc
2024-01-19 18:06:37,166 INFO Writing slice 9 of 29 to ./temp-2022-01-01-2022-01-01/slice-8.nc
2024-01-19 18:06:38,800 INFO Writing slice 10 of 29 to ./temp-2022-01-01-2022-01-01/slice-9.nc
2024-01-19 18:06:40,435 INFO Writing slice 11 of 29 to ./te