diff --git a/data/downloads/odp-ftp-sea-surface-salinity.py b/data/downloads/odp-ftp-sea-surface-salinity.py new file mode 100755 index 000000000..339740a49 --- /dev/null +++ b/data/downloads/odp-ftp-sea-surface-salinity.py @@ -0,0 +1,29 @@ +from xcube.core.store import new_data_store +from datetime import timedelta, datetime +from dateutil.relativedelta import * + +cci_store = new_data_store('esa-cci') + +start_date = '2010-01-01' +end_date = '2022-10-15' +number_of_timestamps = 307 + +cube = cci_store.open_data('esacci.SEASURFACESALINITY.15-days.L4.SSS.multi-sensor.multi-platform.GLOBAL-MERGED_OI_Monthly_CENTRED_15Day_0-25deg.4-41.r1', + variable_names=['sss'], + time_range=[start_date, end_date]) + +s = start_date + "T12:00:00.000Z" +f = "%Y-%m-%dT%H:%M:%S.%fZ" +date = datetime.strptime(s, f) + +for index in range(number_of_timestamps): + + if index: + if index % 2: + # 15th of the month + date = date + timedelta(days=14) + else: + # First of the month + date = date + timedelta(days=-14) + relativedelta(months=+1) + + cube.sss.sel(time=date, method='nearest').to_netcdf(path='./' + date.strftime("%Y%m%d") + '.nc') \ No newline at end of file diff --git a/data/downloads/odp-ftp-sea-surface-salinity.sh b/data/downloads/odp-ftp-sea-surface-salinity.sh deleted file mode 100755 index a1347bcce..000000000 --- a/data/downloads/odp-ftp-sea-surface-salinity.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash - -BASE_URL="https://dap.ceda.ac.uk/neodc/esacci/sea_surface_salinity/data/v03.21/30days" - -START_DATE=2010-02-01 -OUTPUT_FOLDER=./download/sss - -mkdir -p $OUTPUT_FOLDER - -for i in {0..127} -do - NEXT_YEAR=$(date +%Y -d "$START_DATE + $i month") - NEXT_DATE=$(date +%Y%m%d -d "$START_DATE + $i month") - FILENAME=$OUTPUT_FOLDER/$(date +%Y%m%d -d "$START_DATE + $i month").nc - FTP_URL=$BASE_URL/$NEXT_YEAR/ESACCI-SEASURFACESALINITY-L4-SSS-MERGED_OI_Monthly_CENTRED_15Day_25km-$NEXT_DATE-fv3.21.nc - echo $FTP_URL - - curl --silent $FTP_URL > $FILENAME - - python ./data/drop-unused-vars.py --file $FILENAME --variable sss -done diff --git a/pipeline/dags/sea_surface_salinity_sss.py b/pipeline/dags/sea_surface_salinity_sss.py new file mode 100644 index 000000000..053d7b5c5 --- /dev/null +++ b/pipeline/dags/sea_surface_salinity_sss.py @@ -0,0 +1,68 @@ +from datetime import datetime +import task_factories +from airflow import DAG +from airflow.models.param import Param +from helper import get_default_layer_version + +# layer +LAYER_ID = 'sea_surface_salinity' +LAYER_VARIABLE = 'sss' +METADATA = { + "id": f'{LAYER_ID}.{LAYER_VARIABLE}', + "timestamps": [], # will be injected + "min_value": 30, + "max_value": 40, + "type": "image", # 'tiles' or 'image' + "zoom_levels": '0-3', + "units": "PSU", + "colorMap": "custom", + "basemap": "ocean", + "time_format": { + "year": "numeric", + "month": "long", + "day": "numeric" + } +} + +# dev +BUCKET_ORIGIN = 'esa-cfs-cate-data' +BUCKET_TMP = 'esa-cfs-pipeline-tmp' +WORKDIR = '/workdir/files' +COLOR_FILE = f'/opt/airflow/plugins/colors/{LAYER_ID}.{LAYER_VARIABLE}.txt' +DEBUG = False + +default_layer_version = get_default_layer_version() +dag_params = { + "max_files": Param(2, type=["null", "integer"], minimum=0,), + "output_bucket": Param("esa-cfs-pipeline-output", type=["string"], enum=['esa-cfs-pipeline-output', 'esa-cfs-tiles']), + "skip_downloads": Param(False, type="boolean"), + "layer_version": Param(default_layer_version, type="string") +} + +with DAG(dag_id=METADATA["id"], start_date=datetime(2022, 1, 1), schedule=None, catchup=False, params=dag_params) as dag: + + # create tasks + clean_workdir = task_factories.clean_dir_skippable( + task_id='clean_workdir', dir=WORKDIR)() + list_files = task_factories.gcs_list_files( + bucket_name=BUCKET_ORIGIN, layer_id=LAYER_ID, layer_variable=LAYER_VARIABLE) + download = task_factories.gcs_download_file( + bucket_name=BUCKET_ORIGIN, dir=WORKDIR, appendix='_downloaded') + legend_image = task_factories.legend_image( + workdir=WORKDIR, color_file=COLOR_FILE) + metadata = task_factories.metadata(workdir=WORKDIR, metadata=METADATA) + gdal_transforms = task_factories.gdal_transforms( + layer_variable=LAYER_VARIABLE, color_file=COLOR_FILE, layer_type=METADATA['type'], zoom_levels=METADATA['zoom_levels']) + upload = task_factories.upload( + WORKDIR, LAYER_ID, LAYER_VARIABLE, METADATA['type']) + + # connect tasks + files = list_files() + clean_workdir >> files + downloads = download.expand(filename=files) + gdal_transforms(downloads) >> upload() + clean_workdir >> legend_image + metadata(files) + + if DEBUG: + downloads >> task_factories.gdal_info() diff --git a/pipeline/dags/task_factories.py b/pipeline/dags/task_factories.py index b953ea018..ae87c97b2 100644 --- a/pipeline/dags/task_factories.py +++ b/pipeline/dags/task_factories.py @@ -48,7 +48,7 @@ def gcs_list_files(bucket_name: str, layer_id: str, layer_variable: str, task_id def fn(**context): max_files = context["params"]["max_files"] hook = GCSHook('google') - subdir_path = f'{context["params"]["input_bucket_subdir"]}/' if context["params"]["input_bucket_subdir"] else '' + subdir_path = f'{context["params"]["input_bucket_subdir"]}/' if "input_bucket_subdir" in context["params"] else '' filenames = hook.list( bucket_name, match_glob=f'{layer_id}.{layer_variable}/{subdir_path}*.nc') diff --git a/data/gdal-colors/colors-sss.txt b/pipeline/plugins/colors/sea_surface_salinity.sss.txt similarity index 100% rename from data/gdal-colors/colors-sss.txt rename to pipeline/plugins/colors/sea_surface_salinity.sss.txt diff --git a/pipeline/plugins/layer-icons/sea_surface_salinity.sss.png b/pipeline/plugins/layer-icons/sea_surface_salinity.sss.png new file mode 100644 index 000000000..e81a548de Binary files /dev/null and b/pipeline/plugins/layer-icons/sea_surface_salinity.sss.png differ