## Stage In Example

Steps
- Install Data Service (DS) Client Library
- Set Log Level
- Set Environment variables for Stage-In
- Try Stage-In
- Check the results

In [3]:
from anyio.streams import file
%pip install mdps-ds-lib

Note: you may need to restart the kernel to use updated packages.


### Setting the Log Level
- Log level mappings:
- 10 = debug
- 20 = info
- 30 = warning
- 40 = error

In [4]:
import logging
log_level = 30
logging.basicConfig(level=log_level, format="%(asctime)s [%(levelname)s] [%(name)s::%(lineno)d] %(message)s")

#### Stage-in Only Environment Variables

Follow this link for more information: https://app.gitbook.com/o/xZRqGQeQXJ0RP4VMj7Lq/s/UMIRhLdbRQTvMWop8Il9/developer-docs/data/docs/users-guide/stage-in



In [43]:
import os
# The following environment variables are needed to "stage-in" from AWS S3
# They will also be needed if "EDL" settings and "STAC_AUTH_TYPE" settings are coming from parameter store
# Note that this may not be needed if JupyterNotebook can take care of the access.
os.environ['AWS_ACCESS_KEY_ID'] = 'xxx'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'xxx'
os.environ['AWS_SESSION_TOKEN'] = 'xxx'

# The following environment variables are ONLY needed if "stage-in" requires Earth data Login
os.environ['EDL_USERNAME'] = '/unity/uds/user/wphyo/edl_username'  # Parameter Store Key for EarthData Login Username
os.environ['EDL_PASSWORD'] = '/unity/uds/user/wphyo/edl_dwssap'  # Parameter Store Key for EarthData Login Password
os.environ['EDL_PASSWORD_TYPE'] = 'PARAM_STORE'  # Can hardcode it to PARAM_STORE if that is used.
os.environ['EDL_BASE_URL'] = 'urs.earthdata.nasa.gov'  # Earthdata Login URL to get Earthdata token to download files

# The following environment variables are ONLY needed if "stage-in" requires STAC_AUTH_TYPE is "UNITY".
os.environ['PASSWORD_TYPE'] = 'PARAM_STORE'  # Look above links for all available options
os.environ['USERNAME'] = '/unity/uds/user/wphyo/username'
os.environ['PASSWORD'] = '/unity/uds/user/wphyo/dwssap'
os.environ['CLIENT_ID'] = '71g0c73jl77gsqhtlfg2ht388c'
os.environ['COGNITO_URL'] = 'https://cognito-idp.us-west-2.amazonaws.com'


# Others environment variables
os.environ['GRANULES_DOWNLOAD_TYPE'] = 'AMALGAMATION'  # Download type to choose download class. AMALGAMATION, S3, DAAC, HTTP, and so on.
os.environ['DOWNLOADING_KEYS'] = 'data,metadata'  # Which asset keys to download
os.environ['STAC_JSON'] = 'https://raw.githubusercontent.com/GodwinShen/emit-ghg/main/test/catalog.json'  # URL to direct which granules + assets to download
# Other options
# os.environ['STAC_JSON'] = os.path.join(os.getcwd(), 'stage_in.json')  # Alternatively,  you can store the file locally, and point it as a path
os.environ['DOWNLOAD_DIR'] = os.path.join(os.getcwd(), 'downloaded_files')  # Base directory where files will be downloaded
os.environ['OUTPUT_FILE'] = os.path.join(os.getcwd(), 'stage_in_result.json')  # file path where the result is written locally for review


- This is an example STAC_JSON file (or URL contents) used as a guide for stage-in.
- It is a STAC "FeatureCollection", a wrapper of an array of [Stac items](https://github.com/radiantearth/stac-spec/blob/master/item-spec/item-spec.md)

In [7]:
{
    "type": "FeatureCollection",
    "features": [
        {
            "type": "Feature",
            "stac_version": "1.0.0",
            "id": "G2721220118-LPCLOUD",
            "properties": {
                "datetime": "2023-06-20T08:44:26Z",
                "start_datetime": "2023-06-20T08:44:26.000Z",
                "end_datetime": "2023-06-20T08:44:38.000Z",
                "eo:cloud_cover": 41
            },
            "geometry": {
                "type": "Polygon",
                "coordinates": [
                    [
                        [
                            54.235199,
                            39.2283897
                        ],
                        [
                            53.7996902,
                            38.5576591
                        ],
                        [
                            54.6095352,
                            38.031826
                        ],
                        [
                            55.0450439,
                            38.7025566
                        ],
                        [
                            54.235199,
                            39.2283897
                        ]
                    ]
                ]
            },
            "links": [
                {
                    "rel": "self",
                    "href": "https://cmr.earthdata.nasa.gov:443/search/concepts/G2721220118-LPCLOUD.stac"
                },
                {
                    "rel": "parent",
                    "href": "https://cmr.earthdata.nasa.gov:443/search/concepts/C2408009906-LPCLOUD.stac"
                },
                {
                    "rel": "collection",
                    "href": "https://cmr.earthdata.nasa.gov:443/search/concepts/C2408009906-LPCLOUD.stac"
                },
                {
                    "rel": "root",
                    "href": "https://cmr.earthdata.nasa.gov:443/search/"
                },
                {
                    "rel": "via",
                    "href": "https://cmr.earthdata.nasa.gov:443/search/concepts/G2721220118-LPCLOUD.json"
                },
                {
                    "rel": "via",
                    "href": "https://cmr.earthdata.nasa.gov:443/search/concepts/G2721220118-LPCLOUD.umm_json"
                }
            ],
            "assets": {
                "metadata": {
                    "href": "https://cmr.earthdata.nasa.gov:443/search/concepts/G2721220118-LPCLOUD.xml",
                    "type": "application/xml"
                },
                "browse": {
                    "href": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/EMITL1BRAD.001/EMIT_L1B_RAD_001_20230620T084426_2317106_011/EMIT_L1B_RAD_001_20230620T084426_2317106_011.png",
                    "type": "image/png",
                    "title": "Download EMIT_L1B_RAD_001_20230620T084426_2317106_011.png"
                },
                "opendap": {
                    "href": "https://opendap.earthdata.nasa.gov/collections/C2408009906-LPCLOUD/granules/EMIT_L1B_RAD_001_20230620T084426_2317106_011",
                    "title": "OPeNDAP request URL"
                },
                "data": {
                    "href": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL1BRAD.001/EMIT_L1B_RAD_001_20230620T084426_2317106_011/EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc",
                    "title": "Download EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc"
                },
                "data1": {
                    "href": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL1BRAD.001/EMIT_L1B_RAD_001_20230620T084426_2317106_011/EMIT_L1B_OBS_001_20230620T084426_2317106_011.nc",
                    "title": "Download EMIT_L1B_OBS_001_20230620T084426_2317106_011.nc"
                }
            },
            "bbox": [
                53.7996902,
                38.031826,
                55.0450439,
                39.2283897
            ],
            "stac_extensions": [
                "https://stac-extensions.github.io/eo/v1.0.0/schema.json"
            ],
            "collection": "C2408009906-LPCLOUD"
        },
        {
            "type": "Feature",
            "stac_version": "1.0.0",
            "id": "G2721699381-LPCLOUD",
            "properties": {
                "datetime": "2023-06-20T08:44:26Z",
                "start_datetime": "2023-06-20T08:44:26.000Z",
                "end_datetime": "2023-06-20T08:44:38.000Z",
                "eo:cloud_cover": 41
            },
            "geometry": {
                "type": "Polygon",
                "coordinates": [
                    [
                        [
                            54.235199,
                            39.2283897
                        ],
                        [
                            53.7996902,
                            38.5576591
                        ],
                        [
                            54.6095352,
                            38.031826
                        ],
                        [
                            55.0450439,
                            38.7025566
                        ],
                        [
                            54.235199,
                            39.2283897
                        ]
                    ]
                ]
            },
            "links": [
                {
                    "rel": "self",
                    "href": "https://cmr.earthdata.nasa.gov:443/search/concepts/G2721699381-LPCLOUD.stac"
                },
                {
                    "rel": "parent",
                    "href": "https://cmr.earthdata.nasa.gov:443/search/concepts/C2408750690-LPCLOUD.stac"
                },
                {
                    "rel": "collection",
                    "href": "https://cmr.earthdata.nasa.gov:443/search/concepts/C2408750690-LPCLOUD.stac"
                },
                {
                    "rel": "root",
                    "href": "https://cmr.earthdata.nasa.gov:443/search/"
                },
                {
                    "rel": "via",
                    "href": "https://cmr.earthdata.nasa.gov:443/search/concepts/G2721699381-LPCLOUD.json"
                },
                {
                    "rel": "via",
                    "href": "https://cmr.earthdata.nasa.gov:443/search/concepts/G2721699381-LPCLOUD.umm_json"
                }
            ],
            "assets": {
                "metadata": {
                    "href": "https://cmr.earthdata.nasa.gov:443/search/concepts/G2721699381-LPCLOUD.xml",
                    "type": "application/xml"
                },
                "browse": {
                    "href": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/EMITL2ARFL.001/EMIT_L2A_RFL_001_20230620T084426_2317106_011/EMIT_L2A_RFL_001_20230620T084426_2317106_011.png",
                    "type": "image/png",
                    "title": "Download EMIT_L2A_RFL_001_20230620T084426_2317106_011.png"
                },
                "opendap": {
                    "href": "https://opendap.earthdata.nasa.gov/collections/C2408750690-LPCLOUD/granules/EMIT_L2A_RFL_001_20230620T084426_2317106_011",
                    "title": "OPeNDAP request URL"
                },
                "data": {
                    "href": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL2ARFL.001/EMIT_L2A_RFL_001_20230620T084426_2317106_011/EMIT_L2A_RFL_001_20230620T084426_2317106_011.nc",
                    "title": "Download EMIT_L2A_RFL_001_20230620T084426_2317106_011.nc"
                },
                "data1": {
                    "href": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL2ARFL.001/EMIT_L2A_RFL_001_20230620T084426_2317106_011/EMIT_L2A_RFLUNCERT_001_20230620T084426_2317106_011.nc",
                    "title": "Download EMIT_L2A_RFLUNCERT_001_20230620T084426_2317106_011.nc"
                },
                "data2": {
                    "href": "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL2ARFL.001/EMIT_L2A_RFL_001_20230620T084426_2317106_011/EMIT_L2A_MASK_001_20230620T084426_2317106_011.nc",
                    "title": "Download EMIT_L2A_MASK_001_20230620T084426_2317106_011.nc"
                }
            },
            "bbox": [
                53.7996902,
                38.031826,
                55.0450439,
                39.2283897
            ],
            "stac_extensions": [
                "https://stac-extensions.github.io/eo/v1.0.0/schema.json"
            ],
            "collection": "C2408750690-LPCLOUD"
        }
    ]
}

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'stac_version': '1.0.0',
   'id': 'G2721220118-LPCLOUD',
   'properties': {'datetime': '2023-06-20T08:44:26Z',
    'start_datetime': '2023-06-20T08:44:26.000Z',
    'end_datetime': '2023-06-20T08:44:38.000Z',
    'eo:cloud_cover': 41},
   'geometry': {'type': 'Polygon',
    'coordinates': [[[54.235199, 39.2283897],
      [53.7996902, 38.5576591],
      [54.6095352, 38.031826],
      [55.0450439, 38.7025566],
      [54.235199, 39.2283897]]]},
   'links': [{'rel': 'self',
     'href': 'https://cmr.earthdata.nasa.gov:443/search/concepts/G2721220118-LPCLOUD.stac'},
    {'rel': 'parent',
     'href': 'https://cmr.earthdata.nasa.gov:443/search/concepts/C2408009906-LPCLOUD.stac'},
    {'rel': 'collection',
     'href': 'https://cmr.earthdata.nasa.gov:443/search/concepts/C2408009906-LPCLOUD.stac'},
    {'rel': 'root', 'href': 'https://cmr.earthdata.nasa.gov:443/search/'},
    {'rel': 'via',
     'href': 'https://cmr.earthdata.n

#### How to create a Stac Item and a Feature Collection

In [44]:
import json

from pystac import ItemCollection, Item, Asset

from mdps_ds_lib.lib.utils.time_utils import TimeUtils

test1 = ItemCollection(items=[
    Item(id='G2721220118-LPCLOUD',
         geometry={  # Set them if the algorithm knows what type of geometry is needed
             "type": "Point",
             "coordinates": [0.0, 0.0]
         },
         bbox=[53.7996902,
               38.031826,
               55.0450439,
               39.2283897],  # Set them if the algorithm knows what type of geometry is needed
         datetime=TimeUtils().parse_from_str('2023-06-20T08:44:26Z').get_datetime_obj(),
         # Current metadata file creation datetime
         properties={  # These 4 fields are mandatory
             "start_datetime": "2016-01-31T18:00:00.009057Z",
             "end_datetime": "2016-01-31T19:59:59.991043Z",
             "created": "2016-02-01T02:45:59.639000Z",
             "updated": "2022-03-23T15:48:21.578000Z",
         },
         href='https://cmr.earthdata.nasa.gov:443/search/concepts/G2721220118-LPCLOUD.stac',
         collection='NA',  # No need to find out what collection it belongs to DS will take care of that.
         assets={  # Point to all relevant files including itself
             f'EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc': Asset(
                 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL1BRAD.001/EMIT_L1B_RAD_001_20230620T084426_2317106_011/EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc',
                 title='EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc', roles=['data']),
             f'EMIT_L1B_OBS_001_20230620T084426_2317106_011.nc': Asset(
                 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL1BRAD.001/EMIT_L1B_RAD_001_20230620T084426_2317106_011/EMIT_L1B_OBS_001_20230620T084426_2317106_011.nc',
                 title='EMIT_L1B_OBS_001_20230620T084426_2317106_011.nc', roles=['data']),
             f'EMIT_L1B_RAD_001_20230620T084426_2317106_011.xml': Asset(
                 'https://cmr.earthdata.nasa.gov:443/search/concepts/G2721220118-LPCLOUD.xml',
                 title='EMIT_L1B_RAD_001_20230620T084426_2317106_011.xml', roles=['metadata']),
         }),
    Item(id='G2721220118-LPCLOUD',
         geometry={  # Set them if the algorithm knows what type of geometry is needed
             "type": "Point",
             "coordinates": [0.0, 0.0]
         },
         bbox=[53.7996902,
               38.031826,
               55.0450439,
               39.2283897],  # Set them if the algorithm knows what type of geometry is needed
         datetime=TimeUtils().parse_from_str('2023-06-20T08:44:26Z').get_datetime_obj(),
         # Current metadata file creation datetime
         properties={  # These 4 fields are mandatory
             "start_datetime": "2016-01-31T18:00:00.009057Z",
             "end_datetime": "2016-01-31T19:59:59.991043Z",
             "created": "2016-02-01T02:45:59.639000Z",
             "updated": "2022-03-23T15:48:21.578000Z",
         },
         href='https://cmr.earthdata.nasa.gov:443/search/concepts/G2721220118-LPCLOUD.stac',
         collection='NA',  # No need to find out what collection it belongs to DS will take care of that.
         assets={  # Point to all relevant files including itself
             f'EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc': Asset(
                 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL1BRAD.001/EMIT_L1B_RAD_001_20230620T084426_2317106_011/EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc',
                 title='EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc', roles=['data']),
             f'EMIT_L1B_OBS_001_20230620T084426_2317106_011.nc': Asset(
                 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL1BRAD.001/EMIT_L1B_RAD_001_20230620T084426_2317106_011/EMIT_L1B_OBS_001_20230620T084426_2317106_011.nc',
                 title='EMIT_L1B_OBS_001_20230620T084426_2317106_011.nc', roles=['data']),
             f'EMIT_L1B_RAD_001_20230620T084426_2317106_011.xml': Asset(
                 'https://cmr.earthdata.nasa.gov:443/search/concepts/G2721220118-LPCLOUD.xml',
                 title='EMIT_L1B_RAD_001_20230620T084426_2317106_011.xml', roles=['metadata']),
         })
])

print(json.dumps(test1.to_dict(False), indent=4))


{
    "type": "FeatureCollection",
    "features": [
        {
            "type": "Feature",
            "stac_version": "1.0.0",
            "id": "G2721220118-LPCLOUD",
            "properties": {
                "start_datetime": "2016-01-31T18:00:00.009057Z",
                "end_datetime": "2016-01-31T19:59:59.991043Z",
                "created": "2016-02-01T02:45:59.639000Z",
                "updated": "2022-03-23T15:48:21.578000Z",
                "datetime": "2023-06-20T08:44:26Z"
            },
            "geometry": {
                "type": "Point",
                "coordinates": [
                    0.0,
                    0.0
                ]
            },
            "links": [
                {
                    "rel": "self",
                    "href": "https://cmr.earthdata.nasa.gov:443/search/concepts/G2721220118-LPCLOUD.stac",
                    "type": "application/json"
                }
            ],
            "assets": {
                "EMIT_L1B_RAD

#### Performing Stage-in
- Creating Stage-in Directory
- Calling Stage-in method to perform stage-in.
- Checking the result.

In [11]:
from mdps_ds_lib.stage_in_out.download_granules_factory import DownloadGranulesFactory
from mdps_ds_lib.stage_in_out.stage_in_out_utils import StageInOutUtils
from mdps_ds_lib.lib.utils.file_utils import FileUtils
from glob import glob

FileUtils.mk_dir_p(os.environ['DOWNLOAD_DIR'])  # Creating a base directory if not created. They can be created manually w/o calling this.

# Hardcoded method call. All params are set via environment previously
result_str = DownloadGranulesFactory().get_class(os.getenv('GRANULES_DOWNLOAD_TYPE', 'MISSING_GRANULES_DOWNLOAD_TYPE')).download()
StageInOutUtils.write_output_to_file(result_str)
print('done')

print(list(glob(os.path.join(os.environ['DOWNLOAD_DIR'], '*'))))  # Checking if files are downloaded.

2025-02-10 09:48:19,170 [ERROR] [mdps_ds_lib.stage_in_out.download_granules_amalgamation::32] downloading: https://cmr.earthdata.nasa.gov:443/search/concepts/G2721699381-LPCLOUD.xml
2025-02-10 09:48:19,170 [ERROR] [mdps_ds_lib.stage_in_out.download_granules_amalgamation::32] downloading: https://cmr.earthdata.nasa.gov:443/search/concepts/G2721220118-LPCLOUD.xml
2025-02-10 09:48:19,497 [ERROR] [mdps_ds_lib.stage_in_out.download_granules_amalgamation::32] downloading: https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL2ARFL.001/EMIT_L2A_RFL_001_20230620T084426_2317106_011/EMIT_L2A_RFL_001_20230620T084426_2317106_011.nc
2025-02-10 09:48:19,511 [ERROR] [mdps_ds_lib.stage_in_out.download_granules_amalgamation::32] downloading: https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL1BRAD.001/EMIT_L1B_RAD_001_20230620T084426_2317106_011/EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc


done
['/Users/wphyo/Projects/unity/uds_lib/examples/downloaded_files/error.log', '/Users/wphyo/Projects/unity/uds_lib/examples/downloaded_files/G2721699381-LPCLOUD.xml', '/Users/wphyo/Projects/unity/uds_lib/examples/downloaded_files/G2721220118-LPCLOUD.xml', '/Users/wphyo/Projects/unity/uds_lib/examples/downloaded_files/G2721220118-LPCLOUD.stac.json', '/Users/wphyo/Projects/unity/uds_lib/examples/downloaded_files/G2721699381-LPCLOUD.stac.json', '/Users/wphyo/Projects/unity/uds_lib/examples/downloaded_files/catalog.json', '/Users/wphyo/Projects/unity/uds_lib/examples/downloaded_files/EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc', '/Users/wphyo/Projects/unity/uds_lib/examples/downloaded_files/EMIT_L2A_RFL_001_20230620T084426_2317106_011.nc']
