# Generate SMOS Level 2C Datacube

This NB is supposed to be executed using [Papermill](https://papermill.readthedocs.io/).
The following cell defines the input parameters with their default values.
The cell is therefore tagged with the label `parameters`.
When executed with Papermill, the default parameters will be overwritten by the parameters 
supplied to the `papermill` tool in a new cell below the next one.

**CAUTION:** The following cell must be identical to the corresponding one in `estimate_costs.ipynb`.

In [1]:
product_type: str = "SMOS-L2C-SM"   # The product type. Either "SMOS-L2C-SM" or "SMOS-L2C-OS".
time_range: str = "2022-01-01/2022-01-03" # The time range using format "<start>/<end>".
agg_interval: str = "1d"  # Aggregation intervall
res_level: int = 0  # Spatial resolution level in the range 0 to 4. Zero refers to max.

In [2]:
# On EDC, this is a fixed path that receives the output of this NB.
# The data written here will end up in a unique location for every job run.
EDC_OUTPUT_DIR = "/home/jovyan/result-data"

In [3]:
import json
import os
import shutil

from xcube.core.store import new_data_store
from zappend.api import zappend

from helpers import generate_slices
from helpers import get_time_ranges

In [4]:
if os.path.exists(EDC_OUTPUT_DIR):
    output_dir = EDC_OUTPUT_DIR
else:
    output_dir = "./output"
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

In [5]:
target_filename = f"smos-{time_range.replace('-', '').replace('/', '-')}-{agg_interval}.zarr"
target_dir = f'{output_dir}/{target_filename}'
target_dir

'./output/smos-20220101-20220103-1d.zarr'

In [6]:
if os.path.exists(target_dir):
   shutil.rmtree(target_dir) 

In [7]:
time_ranges = get_time_ranges(time_range, agg_interval)
time_ranges

[('2022-01-01', '2022-01-01'),
 ('2022-01-02', '2022-01-02'),
 ('2022-01-03', '2022-01-03')]

In [8]:
try:
    with open("creodias-credentials.json") as f:
        creodias_credentials = json.load(f)
except FileNotFoundException:
    creodias_credentials = {
        "key": os.environ("CREODIAS_S3_KEY"),
        "secret": os.environ("CREODIAS_S3_SECRET")
    }

In [9]:
smos_store = new_data_store(
    'smos', 
    source_path="s3://EODATA", 
    source_storage_options=dict(
        endpoint_url="https://s3.cloudferro.com", 
        anon=False, 
        **creodias_credentials
    ),
    cache_path="./nc_cache",
    xarray_kwargs=dict(
        engine="h5netcdf"
        #engine="netcdf4"
    )
)

In [10]:
zappend_config = {
    "target_dir": target_dir, 
    
    "fixed_dims": {
        "lon": 8192,
        "lat": 4032
    },
    
    "append_dim": "time",
    
    "variables": {
        "*": {
            "encoding": {
                "chunks": [1, 4032 // 4, 8192 // 4]
            }
        },
        "time": {
            "encoding": {
                "chunks": [100]
            }
        },
        "time_bnds": {
            "encoding": {
                "chunks": [100, 2]
            }
        },
        "lat": {
            "encoding": {
                "chunks": [4032]
            }
        },
        "lon": {
            "encoding": {
                "chunks": [8192]
            }
        },
    },
    
    # Log to the console.
    # Note you could also configure the log output for dask here.
    "logging": {
        "version": 1,
        "formatters": {
            "normal": {
                "format": "%(asctime)s %(levelname)s %(message)s",
                "style": "%"
            }
        },
        "handlers": {
            "console": {
                "class": "logging.StreamHandler",
                "formatter": "normal"
            }
        },
        "loggers": {
            "zappend": {
                "level": "INFO",
                "handlers": ["console"]
            },
            "notebook": {
                "level": "INFO",
                "handlers": ["console"]
            },
            #"xcube-smos": {
            #    "level": "DEBUG",
            #    "handlers": ["console"]
            #}
        }
    }
}

In [None]:
slices = generate_slices(smos_store, product_type, time_ranges, agg_interval)
zappend(slices, config=zappend_config)