This NB demonstrates fast access of SMOS archive with **boto3**.

In [1]:
import boto3
import json

with open("creodias-credentials.json") as f:
    credentials = json.load(f)

In [2]:
# session = boto3.session.Session()
# s3_client = session.client(
#    service_name='s3',
#    aws_access_key_id=credentials["key"],
#    aws_secret_access_key=credentials["secret"],
#    endpoint_url='https://s3.cloudferro.com',
# )

In [3]:
s3_client = boto3.client(
    "s3",
    aws_access_key_id=credentials["key"],
    aws_secret_access_key=credentials["secret"],
    endpoint_url="https://s3.cloudferro.com",
)

In [4]:
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/paginator/ListObjectsV2.html
s3_paginator = s3_client.get_paginator("list_objects_v2")


def yield_keys(bucket_name, prefix=""):
    if prefix and not prefix.endswith("/"):
        prefix += "/"
    for page in s3_paginator.paginate(Bucket=bucket_name, Prefix=prefix, Delimiter="/"):
        for common_prefix in page.get("CommonPrefixes", ()):
            yield from yield_keys(bucket_name, prefix=common_prefix["Prefix"])
        for content in page.get("Contents", ()):
            key = content["Key"]
            if not key.endswith("/"):
                yield key


def yield_prefixes(bucket_name, prefix=""):
    if prefix and not prefix.endswith("/"):
        prefix += "/"
    for page in s3_paginator.paginate(Bucket=bucket_name, Prefix=prefix, Delimiter="/"):
        for common_prefix in page.get("CommonPrefixes", ()):
            yield common_prefix["Prefix"]

In [5]:
prefixes = yield_prefixes("EODATA", prefix="SMOS/L2OS/MIR_OSUDP2")
for index, prefix in enumerate(prefixes):
    print(prefix)
    if index > 100:
        break

SMOS/L2OS/MIR_OSUDP2/2010/
SMOS/L2OS/MIR_OSUDP2/2011/
SMOS/L2OS/MIR_OSUDP2/2012/
SMOS/L2OS/MIR_OSUDP2/2013/
SMOS/L2OS/MIR_OSUDP2/2014/
SMOS/L2OS/MIR_OSUDP2/2015/
SMOS/L2OS/MIR_OSUDP2/2016/
SMOS/L2OS/MIR_OSUDP2/2017/
SMOS/L2OS/MIR_OSUDP2/2018/
SMOS/L2OS/MIR_OSUDP2/2019/
SMOS/L2OS/MIR_OSUDP2/2020/
SMOS/L2OS/MIR_OSUDP2/2021/
SMOS/L2OS/MIR_OSUDP2/2022/
SMOS/L2OS/MIR_OSUDP2/2023/


In [6]:
keys = yield_keys("EODATA", prefix="SMOS/L2OS/MIR_OSUDP2")
for index, key in enumerate(keys):
    print(key)
    if index > 100:
        break

SMOS/L2OS/MIR_OSUDP2/2010/06/01/SM_REPR_MIR_OSUDP2_20100601T014135_20100601T020457_700_100_1/SM_REPR_MIR_OSUDP2_20100601T014135_20100601T020457_700_100_1.nc
SMOS/L2OS/MIR_OSUDP2/2010/06/01/SM_REPR_MIR_OSUDP2_20100601T123158_20100601T132517_700_100_1/SM_REPR_MIR_OSUDP2_20100601T123158_20100601T132517_700_100_1.nc
SMOS/L2OS/MIR_OSUDP2/2010/06/01/SM_REPR_MIR_OSUDP2_20100601T105153_20100601T114513_700_100_1/SM_REPR_MIR_OSUDP2_20100601T105153_20100601T114513_700_100_1.nc
SMOS/L2OS/MIR_OSUDP2/2010/06/01/SM_REPR_MIR_OSUDP2_20100601T000131_20100601T001849_700_100_1/SM_REPR_MIR_OSUDP2_20100601T000131_20100601T001849_700_100_1.nc
SMOS/L2OS/MIR_OSUDP2/2010/06/01/SM_REPR_MIR_OSUDP2_20100601T100157_20100601T105511_700_100_1/SM_REPR_MIR_OSUDP2_20100601T100157_20100601T105511_700_100_1.nc
SMOS/L2OS/MIR_OSUDP2/2010/06/01/SM_REPR_MIR_OSUDP2_20100601T200225_20100601T205537_700_100_1/SM_REPR_MIR_OSUDP2_20100601T200225_20100601T205537_700_100_1.nc
SMOS/L2OS/MIR_OSUDP2/2010/06/01/SM_REPR_MIR_OSUDP2_2010060