diff --git a/README.md b/README.md index e86697e..da7910b 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,29 @@ The `process.ipynb` notebook file is designed to work either as an independent n Unity OGC applications rely upon using [Papermill parameritzation](https://papermill.readthedocs.io/en/latest/usage-parameterize.html) of arguments. One of the cells is tagged with the `parameters` tag, indicating to Papermill which cell to inspect for insertion of values from the command line. See the [app-pack-generator](https://github.com/unity-sds/app-pack-generator) for more information on the formatting of parameters and the use of type hints. +### OGC Run + to run this without the stage-in or out parameters, simply call the process.cwl contained in this repo: + +``` +# run stage in + +cwltool --outdir stage_in --copy-output stage_in.cwl test/ogc_app_package/stage_in.yml + +#For my run, it ended up in a directory called z7ai3uj8 + +# For now, the current stage_in creates an invalid 'root' in catalog.json. you'll need to set the root from //catalog.json to catalog.json +vim z7ai3uj8/catalog.json + +# Pass that as the directory into the process.py runs +cwltool process.cwl --example_argument_empty '' --input z7ai3uj8/ --output_collection + +#The output files and catalog.json from this run were stored in 02kajdto + +#run stageout +cwltool stage_out.cwl --output_dir 02kajdto/ --staging_bucket + +``` + ### stage-in This notebook is connected to a Unity stage-in process through the `input_stac_collection_file` variable. This variable contains the location of a STAC feature collection file. That feature collection points to the input files used by the notebook. In our example notebook we use Unity-py to parse the file and obtain the full paths to the input files. @@ -67,4 +90,4 @@ See our [releases page](https://github.com/unity-sds/unity-example-application/r ## License -See our: [LICENSE](LICENSE.txt) \ No newline at end of file +See our: [LICENSE](LICENSE.txt) diff --git a/process.cwl b/process.cwl new file mode 100644 index 0000000..7c3b3d3 --- /dev/null +++ b/process.cwl @@ -0,0 +1,62 @@ +#!/usr/bin/env cwl-runner +arguments: +- -p +- input +- $(inputs.input.path) +- -p +- output +- $(runtime.outdir) +baseCommand: +- papermill +- /home/jovyan/process.ipynb +- --cwd +- /home/jovyan +- "process_out.ipynb" +- -f +- /tmp/inputs.json +- -k +- python3 +- --log-output +class: CommandLineTool +cwlVersion: v1.2 +inputs: + input: Directory + example_argument_bool: + default: true + type: boolean + example_argument_empty: + default: null + type: string + example_argument_float: + default: 1.0 + type: float + example_argument_int: + default: 1 + type: int + example_argument_string: + default: string + type: string + output_collection: + default: example-app-collection___1 + type: string + summary_table_filename: + default: summary_table.txt + type: string +outputs: + output: + outputBinding: + glob: $(runtime.outdir) + type: Directory +requirements: + DockerRequirement: + dockerPull: gangl/unity-ogc-example-application:174ee35b + InitialWorkDirRequirement: + listing: + - entry: $(inputs) + entryname: /tmp/inputs.json + InlineJavascriptRequirement: {} + InplaceUpdateRequirement: + inplaceUpdate: true + NetworkAccess: + networkAccess: true + ShellCommandRequirement: {} diff --git a/process.ipynb b/process.ipynb index d1c05a3..4938ed3 100644 --- a/process.ipynb +++ b/process.ipynb @@ -23,19 +23,36 @@ "from unity_sds_client.resources.data_file import DataFile" ] }, + { + "cell_type": "markdown", + "id": "5cb70c6e-a08b-49c4-bbef-13200a18bfda", + "metadata": {}, + "source": [ + "## Parameters Cell\n", + "\n", + "The below cell is tagged as a 'paramter' cell. This enables us to overwrite the below values at runtime. There are some special values in the below cell.\n", + "\n", + "* `input` is a special name, and it also has the `# type: stage-in` annotation. This should be a directory, and at run time it will be populated with a STAC catalog that contains files that have been staged for your algorithm to reference.\n", + "* `output` is a special name, and it also has the `# type: stage-out` annotation. This should be treated as a directory to which you write ALL of your output files along with a STAC catalog that references files you would like to persist outside of the algorithm run." + ] + }, { "cell_type": "code", "execution_count": 2, "id": "04ac7f2d", "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, "tags": [ "parameters" ] }, "outputs": [], "source": [ - "input_stac_collection_file = 'test/stage_in/stage_in_results.json' # type: stage-in\n", - "output_stac_catalog_dir = 'test/process_results/' # type: stage-out\n", + "input = 'test/stage_in/' # type: stage-in\n", + "output = 'test/process_results/' # type: stage-out\n", "\n", "# Filename written to the working directory\n", "summary_table_filename = \"summary_table.txt\"\n", @@ -52,6 +69,25 @@ "example_argument_empty = None # type: string Allow a null value or a string\n" ] }, + { + "cell_type": "code", + "execution_count": 8, + "id": "62471f5d-d898-46c1-89c1-b572851db551", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "reading test/stage_in/catalog.json\n" + ] + } + ], + "source": [ + "input_catalog = os.path.join(input, \"catalog.json\")\n", + "print(\"reading {}\".format(input_catalog))" + ] + }, { "cell_type": "markdown", "id": "7926d21b", @@ -64,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 10, "id": "2eeaa5d4", "metadata": {}, "outputs": [ @@ -88,7 +124,7 @@ "'\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n
argument_name type value
example_argument_int <class 'int'> 1
example_argument_float <class 'float'> 1.0
example_argument_string<class 'str'> string
example_argument_bool <class 'bool'> True
example_argument_empty <class 'NoneType'>
'" ] }, - "execution_count": 3, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -123,24 +159,24 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 11, "id": "3a09d57c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['/home/jovyan/unity-example-application/test/stage_in/./SNDR.SS1330.CHIRP.20160822T0005.m06.g001.L1_AQ.std.v02_48.G.200425095850.nc',\n", - " '/home/jovyan/unity-example-application/test/stage_in/./SNDR.SS1330.CHIRP.20160822T0011.m06.g002.L1_AQ.std.v02_48.G.200425095901.nc']" + "['/Users/gangl/dev/unity/unity-OGC-example-application/test/stage_in/./SNDR.SS1330.CHIRP.20160822T0005.m06.g001.L1_AQ.std.v02_48.G.200425095850.nc',\n", + " '/Users/gangl/dev/unity/unity-OGC-example-application/test/stage_in/./SNDR.SS1330.CHIRP.20160822T0011.m06.g002.L1_AQ.std.v02_48.G.200425095901.nc']" ] }, - "execution_count": 4, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "inp_collection = Collection.from_stac(input_stac_collection_file)\n", + "inp_collection = Collection.from_stac(input_catalog)\n", "data_filenames = inp_collection.data_locations()\n", "\n", "data_filenames" @@ -158,7 +194,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 12, "id": "9fbac209", "metadata": {}, "outputs": [], @@ -178,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 13, "id": "d22c8670", "metadata": {}, "outputs": [], @@ -195,7 +231,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 14, "id": "3344bd15", "metadata": {}, "outputs": [ @@ -216,7 +252,7 @@ "'\\n\\n\\n\\n\\n\\n\\n\\n
product_name product_name_type_id shortname product_version date_created time_coverage_start time_coverage_end geospatial_lat_mid geospatial_lon_mid
SNDR.SS1330.CHIRP.20160822T0005.m06.g001.L1_AQ.std.v02_48.G.200425095850.ncL1_AQ SNDR13CHRP1v02.48.00 2021-04-25T05:59:08Z2016-08-22T00:05:22Z 2016-08-22T00:11:22Z -48.6062 12.4563
SNDR.SS1330.CHIRP.20160822T0011.m06.g002.L1_AQ.std.v02_48.G.200425095901.ncL1_AQ SNDR13CHRP1v02.48.00 2021-04-25T05:59:19Z2016-08-22T00:11:22Z 2016-08-22T00:17:22Z -69.3979 -1.98753
'" ] }, - "execution_count": 7, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -228,14 +264,14 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 16, "id": "014257f3", "metadata": {}, "outputs": [], "source": [ "# Write the table in text format\n", - "pathlib.Path(output_stac_catalog_dir).mkdir(parents=True, exist_ok=True)\n", - "output_filename = os.path.join(output_stac_catalog_dir, summary_table_filename)\n", + "pathlib.Path(output).mkdir(parents=True, exist_ok=True)\n", + "output_filename = os.path.join(output, summary_table_filename)\n", "with open(output_filename, \"w\") as summary_file:\n", " summary_file.write(tabulate(table_data, headers=column_names))" ] @@ -250,9 +286,15 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 18, "id": "b4aa5d3b", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "# Create a collection\n", @@ -271,14 +313,14 @@ "dataset.add_data_file(DataFile(\"csv\", summary_table_filename, [\"data\"]))\n", "\n", "#when we run \"to_stac\" below, this file will be generated. this needs to be added to the stac file itself for future reference.\n", - "dataset.add_data_file(DataFile(\"json\", output_stac_catalog_dir + \"/\" + summary_table_filename +'.json', [\"metadata\"] ))\n", + "dataset.add_data_file(DataFile(\"json\", output + \"/\" + summary_table_filename +'.json', [\"metadata\"] ))\n", "\n", "\n", "# Add the dataset to the collection\n", "#out_collection.add_dataset(dataset)\n", "out_collection._datasets.append(dataset)\n", "\n", - "Collection.to_stac(out_collection, output_stac_catalog_dir)" + "Collection.to_stac(out_collection, output)" ] }, { @@ -307,7 +349,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.18" + "version": "3.10.8" } }, "nbformat": 4, diff --git a/requirements.txt b/requirements.txt index 915a462..93e877e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ papermill -unity-sds-client==0.3.0 +unity-sds-client==0.6.1 netCDF4 tabulate diff --git a/stage_in.cwl b/stage_in.cwl new file mode 100644 index 0000000..072b1d4 --- /dev/null +++ b/stage_in.cwl @@ -0,0 +1,108 @@ +#!/usr/bin/env cwl-runner +baseCommand: +- DOWNLOAD +class: CommandLineTool +cwlVersion: v1.2 +inputs: + download_type: + type: string + downloading_keys: + default: data, metadata + type: string + downloading_roles: + default: data, metadata + type: string + edl_password: + default: /sps/processing/workflows/edl_password + type: string + edl_password_type: + default: PARAM_STORE + type: string + edl_username: + default: /sps/processing/workflows/edl_username + type: string + stac_json: + type: + - string + - File + unity_client_id: + type: string + unity_cognito: + default: https://cognito-idp.us-west-2.amazonaws.com + type: string + unity_password: + default: /sps/processing/workflows/unity_password + type: string + unity_ssl: + default: 'TRUE' + type: string + unity_stac_auth: + default: NONE + type: string + unity_type: + default: PARAM_STORE + type: string + unity_username: + default: /sps/processing/workflows/unity_username + type: string +outputs: + stage_in_collection_file: + outputBinding: + glob: stage-in-results.json + type: File + stage_in_download_dir: + outputBinding: + glob: . + type: Directory +requirements: + DockerRequirement: + dockerPull: ghcr.io/unity-sds/unity-data-services:9.0.0 + NetworkAccess: + networkAccess: true + EnvVarRequirement: + envDef: + - envName: CLIENT_ID + envValue: $(inputs.unity_client_id) + - envName: COGNITO_URL + envValue: $(inputs.unity_cognito) + - envName: DOWNLOADING_KEYS + envValue: $(inputs.downloading_keys) + - envName: DOWNLOADING_ROLES + envValue: $(inputs.downloading_roles) + - envName: DOWNLOAD_DIR + envValue: $(runtime.outdir) + - envName: DOWNLOAD_RETRY_TIMES + envValue: '5' + - envName: DOWNLOAD_RETRY_WAIT_TIME + envValue: '30' + - envName: EDL_BASE_URL + envValue: https://urs.earthdata.nasa.gov/ + - envName: EDL_PASSWORD + envValue: $(inputs.edl_password) + - envName: EDL_PASSWORD_TYPE + envValue: $(inputs.edl_password_type) + - envName: EDL_USERNAME + envValue: $(inputs.edl_username) + - envName: GRANULES_DOWNLOAD_TYPE + envValue: $(inputs.download_type) + - envName: LOG_LEVEL + envValue: '20' + - envName: OUTPUT_FILE + envValue: $(runtime.outdir)/stage-in-results.json + - envName: PARALLEL_COUNT + envValue: '-1' + - envName: PASSWORD + envValue: $(inputs.unity_password) + - envName: PASSWORD_TYPE + envValue: $(inputs.unity_type) + - envName: STAC_AUTH_TYPE + envValue: $(inputs.unity_stac_auth) + - envName: USERNAME + envValue: $(inputs.unity_username) + - envName: VERIFY_SSL + envValue: $(inputs.unity_ssl) + - envName: STAC_JSON + envValue: "${\n console.log(typeof inputs.stac_json);\n if (typeof inputs.stac_json\ + \ === 'object'){\n return inputs.stac_json.path;\n }\n else{\n return\ + \ inputs.stac_json;\n }\n}\n" + InlineJavascriptRequirement: {} diff --git a/stage_out.cwl b/stage_out.cwl new file mode 100644 index 0000000..973c05c --- /dev/null +++ b/stage_out.cwl @@ -0,0 +1,64 @@ +#!/usr/bin/env cwl-runner +baseCommand: +- UPLOAD +class: CommandLineTool +cwlVersion: v1.2 +inputs: + aws_access_key_id: + default: '' + type: string + aws_region: + default: us-west-2 + type: string + aws_secret_access_key: + default: '' + type: string + aws_session_token: + default: '' + type: string + collection_id: + default: '' + type: string + output_dir: + type: Directory + result_path_prefix: + default: '' + type: string + staging_bucket: + default: '' + type: string +outputs: + failed_features: + outputBinding: + glob: $(runtime.outdir)/failed_features.json + type: File + stage_out_results: + outputBinding: + glob: $(runtime.outdir)/stage-out-results.json + type: File + successful_features: + outputBinding: + glob: $(runtime.outdir)/successful_features.json + type: File +requirements: + DockerRequirement: + dockerPull: ghcr.io/unity-sds/unity-data-services:7.12.2 + EnvVarRequirement: + envDef: + AWS_ACCESS_KEY_ID: $(inputs.aws_access_key_id) + AWS_REGION: $(inputs.aws_region) + AWS_SECRET_ACCESS_KEY: $(inputs.aws_secret_access_key) + AWS_SESSION_TOKEN: $(inputs.aws_session_token) + CATALOG_FILE: $(inputs.output_dir.path)/catalog.json + COLLECTION_ID: $(inputs.collection_id) + LOG_LEVEL: '20' + OUTPUT_DIRECTORY: $(runtime.outdir) + OUTPUT_FILE: $(runtime.outdir)/stage-out-results.json + PARALLEL_COUNT: '-1' + RESULT_PATH_PREFIX: $(inputs.result_path_prefix) + STAGING_BUCKET: $(inputs.staging_bucket) + InitialWorkDirRequirement: + listing: + - entry: $(inputs.output_dir) + entryname: /tmp/outputs +stdout: stage-out-results.json diff --git a/test/ogc_app_package/stage_in.yml b/test/ogc_app_package/stage_in.yml new file mode 100644 index 0000000..0fbd99e --- /dev/null +++ b/test/ogc_app_package/stage_in.yml @@ -0,0 +1,11 @@ +{ + stac_json: 'https://raw.githubusercontent.com/unity-sds/unity-tutorial-application/main/test/stage_in/stage_in_results.json', + downloading_roles: '', + downloading_keys: 'data', #key under "assets" in the stage_in_results.json + download_type: 'HTTP', + edl_username: None, + edl_password_type: '', + edl_password: '', + unity_client_id: '', + unity_stac_auth: 'NONE' +} diff --git a/test/stage_in/stage_in_results.json b/test/stage_in/catalog.json similarity index 100% rename from test/stage_in/stage_in_results.json rename to test/stage_in/catalog.json