## Performing Stage-In programatically via Python NoteBook.
### Steps
- Download and install uds library from https://pypi.org/project/mdps-ds-lib/
- Set Log Level
- Create environment variables
- Call stage-in class to see them downloaded. 

In [1]:
%pip install mdps-ds-lib

Collecting mdps-ds-lib
  Using cached mdps_ds_lib-0.1.0-py3-none-any.whl.metadata (1.3 kB)
Collecting elasticsearch==7.13.4 (from mdps-ds-lib)
  Using cached elasticsearch-7.13.4-py2.py3-none-any.whl.metadata (7.7 kB)
Collecting fastjsonschema<3.0.0,>=2.19.1 (from mdps-ds-lib)
  Using cached fastjsonschema-2.20.0-py3-none-any.whl.metadata (2.1 kB)
Collecting jsonschema<5.0.0,>=4.23.0 (from mdps-ds-lib)
  Using cached jsonschema-4.23.0-py3-none-any.whl.metadata (7.9 kB)
Collecting requests-aws4auth==1.2.3 (from mdps-ds-lib)
  Using cached requests_aws4auth-1.2.3-py2.py3-none-any.whl.metadata (18 kB)
Collecting xmltodict==0.13.0 (from mdps-ds-lib)
  Using cached xmltodict-0.13.0-py2.py3-none-any.whl.metadata (7.7 kB)
Using cached mdps_ds_lib-0.1.0-py3-none-any.whl (73 kB)
Using cached elasticsearch-7.13.4-py2.py3-none-any.whl (356 kB)
Using cached requests_aws4auth-1.2.3-py2.py3-none-any.whl (24 kB)
Using cached xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB)
Using cached fastjsonschema-

In [3]:
import logging
logging.basicConfig(level=30, format="%(asctime)s [%(levelname)s] [%(name)s::%(lineno)d] %(message)s")


In [4]:
import os

os.environ['AWS_ACCESS_KEY_ID'] = 'ddd'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'ddd'
os.environ['AWS_SESSION_TOKEN'] = 'ddd'


os.environ['EDL_USERNAME'] = '/unity/uds/user/abcd/edl_username'  # Parameter Store Key for EarthData Login Username
os.environ['EDL_PASSWORD'] = '/unity/uds/user/abcd/edl_pass'  # Parameter Store Key for EarthData Login Password
os.environ['EDL_PASSWORD_TYPE'] = 'PARAM_STORE'  # Can hardcode it to PARAM_STORE if that is used.
os.environ['EDL_BASE_URL'] = 'urs.earthdata.nasa.gov'  # Earthdata Login URL to get Earthdata token to download files
os.environ['GRANULES_DOWNLOAD_TYPE'] = 'DAAC'  # Download type to choose download class. DAAC, HTTP, and so on.
os.environ['DOWNLOADING_KEYS'] = 'data,metadata'  # Which asset keys to download

os.environ['STAC_JSON'] = 'https://raw.githubusercontent.com/GodwinShen/emit-ghg/main/test/catalog.json'  # URL to direct which granules + assets to download
# os.environ['STAC_JSON'] = os.path.join(os.getcwd(), 'stage_in.json')  # Alternatively,  you can store the file locally, and point it as a path
os.environ['DOWNLOAD_DIR'] = os.path.join(os.getcwd(), 'downloaded_files')  # Base directory where files will be downloaded
os.environ['OUTPUT_FILE'] = os.path.join(os.getcwd(), 'stage_in_result.json')  # file path where the result is written locally for review

In [6]:
from mdps_ds_lib.lib.utils.file_utils import FileUtils
FileUtils.mk_dir_p(os.environ['DOWNLOAD_DIR'])  # Creating a base directory if not created. They can be created manually w/o calling this. 


In [7]:
from mdps_ds_lib.stage_in_out.download_granules_factory import DownloadGranulesFactory
from mdps_ds_lib.stage_in_out.stage_in_out_utils import StageInOutUtils

# Hardcoded method call. All params are set via environment previously
result_str = DownloadGranulesFactory().get_class(os.getenv('GRANULES_DOWNLOAD_TYPE', 'MISSING_GRANULES_DOWNLOAD_TYPE')).download()
StageInOutUtils.write_output_to_file(result_str)
print('done')

done


In [8]:
from glob import glob
print(list(glob(os.path.join(os.environ['DOWNLOAD_DIR'], '*'))))  # Checking if files are downloaded. 

['/home/jovyan/downloaded_files/G2721220118-LPCLOUD.xml', '/home/jovyan/downloaded_files/EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc', '/home/jovyan/downloaded_files/EMIT_L2A_RFL_001_20230620T084426_2317106_011.nc', '/home/jovyan/downloaded_files/downloaded_feature_collection.json', '/home/jovyan/downloaded_files/G2721699381-LPCLOUD.xml']


## Performing Stage-out programatically as auxiliary files
#### Steps
- Create environment variables
- Call stage-in class to see them uploaded.

In [9]:
os.environ['GRANULES_UPLOAD_TYPE'] = 'UPLOAD_AUXILIARY_FILE_AS_GRANULE'  # Setting uploading as auxiliary
os.environ['STAGING_BUCKET'] = 'uds-test-cumulus-internal'  # S3 bucket where they will reside
os.environ['VERIFY_SSL'] = 'FALSE'  # Optional param.
os.environ['RESULT_PATH_PREFIX'] = 'stage_out'  # Optional. prefix to store the result in S3 which will trigger auto catalog.
os.environ['PARALLEL_COUNT'] = '1'  # How many threads are used to upload it

os.environ['OUTPUT_DIRECTORY'] = os.path.join(os.getcwd(), 'output_result_dir')  # the success / failure results to be stored locally for rewview
os.environ['BASE_DIRECTORY'] = os.path.join(os.getcwd(), 'downloaded_files')  # Base folder to upload
os.environ['OUTPUT_FILE'] = os.path.join(os.getcwd(), 'stage_out_result.json')  # file path where the result overview is written locally for review

In [10]:
tenant = 'UDS_DEMO'
tenant_venue = 'TEST' 
collection_name = 'UDS_UNIT_COLLECTION'
collection_version = '24.09.10.11.00'.replace('.', '') 
temp_collection_id = f'URN:NASA:UNITY:{tenant}:{tenant_venue}:{collection_name}___{collection_version}'
os.environ['COLLECTION_ID'] = temp_collection_id  # Setting Collection ID


In [11]:
from mdps_ds_lib.stage_in_out.upoad_granules_factory import UploadGranulesFactory
from mdps_ds_lib.stage_in_out.stage_in_out_utils import StageInOutUtils

FileUtils.mk_dir_p(os.environ['OUTPUT_DIRECTORY'])

# Hardcoded method call. All params are set via environment previously
upload_result_str = UploadGranulesFactory().get_class(os.getenv('GRANULES_UPLOAD_TYPE', UploadGranulesFactory.UPLOAD_S3_BY_STAC_CATALOG)).upload()
StageInOutUtils.write_output_to_file(upload_result_str)
print('done')

done


In [12]:
from mdps_ds_lib.lib.aws.aws_s3 import AwsS3

s3 = AwsS3()
# Check S3 if they are uploaded.
s3_keys = [str(k) for k in s3.get_child_s3_files(os.environ['STAGING_BUCKET'], os.environ['COLLECTION_ID'])]
print('\n'.join(s3_keys))

('URN:NASA:UNITY:UDS_DEMO:TEST:UDS_UNIT_COLLECTION___2409101100/URN:NASA:UNITY:UDS_DEMO:TEST:UDS_UNIT_COLLECTION___2409101100:URN:NASA:UNITY:UDS_DEMO:TEST:UDS_UNIT_COLLECTION___2409101100:EMIT_L1B_RAD_001_20230620T084426_2317106_011/EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc', 1852557979)
('URN:NASA:UNITY:UDS_DEMO:TEST:UDS_UNIT_COLLECTION___2409101100/URN:NASA:UNITY:UDS_DEMO:TEST:UDS_UNIT_COLLECTION___2409101100:URN:NASA:UNITY:UDS_DEMO:TEST:UDS_UNIT_COLLECTION___2409101100:EMIT_L1B_RAD_001_20230620T084426_2317106_011/EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc.stac.json', 1724)
('URN:NASA:UNITY:UDS_DEMO:TEST:UDS_UNIT_COLLECTION___2409101100/URN:NASA:UNITY:UDS_DEMO:TEST:UDS_UNIT_COLLECTION___2409101100:URN:NASA:UNITY:UDS_DEMO:TEST:UDS_UNIT_COLLECTION___2409101100:EMIT_L2A_RFL_001_20230620T084426_2317106_011/EMIT_L2A_RFL_001_20230620T084426_2317106_011.nc', 1851092294)
('URN:NASA:UNITY:UDS_DEMO:TEST:UDS_UNIT_COLLECTION___2409101100/URN:NASA:UNITY:UDS_DEMO:TEST:UDS_UNIT_COLLECTIO