This NB demonstrates accessing the SMOS NetCDF archive.

In [1]:
import json
import xarray as xr
import fsspec
import timeit
import os
import os.path
import shutil

In [2]:
OS_VAR_NAMES = {
    "Mean_acq_time",
    "SSS_corr",
    "Sigma_SSS_corr",
    "SSS_anom",
    "Sigma_SSS_anom",
    "Dg_chi2_corr",
    "Dg_quality_SSS_corr",
    "Dg_quality_SSS_anom",
    "Coast_distance",
    "Dg_RFI_X",
    "Dg_RFI_Y",
    "X_swath",
}

SM_VAR_NAMES = {
    "Mean_acq_time",
    "Soil_Moisture",
    "Soil_Moisture_DQX",
    "Chi_2",
    "Chi_2_P",
    "N_RFI_X",
    "N_RFI_Y",
    "RFI_Prob",
    "X_swath",
}

In [3]:
NC_CACHE_DIR = "./nc_cache"

In [4]:
def clean_cache():
    if os.path.isdir(NC_CACHE_DIR):
        shutil.rmtree(NC_CACHE_DIR)

In [5]:
def include_vars(ds, var_names):
    return ds.drop_vars([v for v in ds.data_vars 
                         if var_names is None 
                         or not (v in var_names or v == "Grid_Point_ID")])

In [6]:
with open("creodias-credentials.json") as f:
    credentials = json.load(f)
    
storage_options = dict(
    endpoint_url='https://s3.cloudferro.com',
    **credentials
)

In [7]:
fs = fsspec.filesystem("s3", **storage_options)

In [8]:
def get_remote_files(fs, prefix):
    remote_files = []
    for root, _, files in fs.walk(prefix):
        remote_files.extend([f"{root}/{f}" for f in files if f])
    return remote_files

In [9]:
remote_files = get_remote_files(fs, "EODATA/SMOS/L2OS/MIR_OSUDP2/2023/06/01")
remote_files

['EODATA/SMOS/L2OS/MIR_OSUDP2/2023/06/01/SM_OPER_MIR_OSUDP2_20230601T012104_20230601T021417_700_001_1/SM_OPER_MIR_OSUDP2_20230601T012104_20230601T021417_700_001_1.nc',
 'EODATA/SMOS/L2OS/MIR_OSUDP2/2023/06/01/SM_OPER_MIR_OSUDP2_20230601T030110_20230601T035422_700_001_1/SM_OPER_MIR_OSUDP2_20230601T030110_20230601T035422_700_001_1.nc',
 'EODATA/SMOS/L2OS/MIR_OSUDP2/2023/06/01/SM_OPER_MIR_OSUDP2_20230601T044112_20230601T053426_700_001_1/SM_OPER_MIR_OSUDP2_20230601T044112_20230601T053426_700_001_1.nc',
 'EODATA/SMOS/L2OS/MIR_OSUDP2/2023/06/01/SM_OPER_MIR_OSUDP2_20230601T053109_20230601T062428_700_001_1/SM_OPER_MIR_OSUDP2_20230601T053109_20230601T062428_700_001_1.nc',
 'EODATA/SMOS/L2OS/MIR_OSUDP2/2023/06/01/SM_OPER_MIR_OSUDP2_20230601T080121_20230601T085434_700_001_1/SM_OPER_MIR_OSUDP2_20230601T080121_20230601T085434_700_001_1.nc',
 'EODATA/SMOS/L2OS/MIR_OSUDP2/2023/06/01/SM_OPER_MIR_OSUDP2_20230601T094126_20230601T103439_700_001_1/SM_OPER_MIR_OSUDP2_20230601T094126_20230601T103439_700_001

In [10]:
def open_all(fs, remote_files):
    return [open_one(fs, f) for f in remote_files]


def open_one(fs, remote_file):
    local_file = f"{NC_CACHE_DIR}/{remote_file}"
    if not os.path.isfile(local_file):
        key_prefix = "VH:SPH:MI:TI:"
        if "/SMOS/L2OS/" in remote_file:
            var_names = OS_VAR_NAMES
        elif "/SMOS/L2SM/" in remote_file:
            var_names = SM_VAR_NAMES
        else:
            var_names = None
        os.makedirs(os.path.dirname(local_file), exist_ok=True)
        temp_file = local_file + ".temp"
        fs.get(remote_file, temp_file)
        with xr.open_dataset(temp_file, decode_cf=False, chunks={}) as ds:
            dataset = include_vars(ds, var_names)
            dataset.attrs = {k[len(key_prefix):]: v 
                             for k, v in dataset.attrs.items() 
                             if k.startswith(key_prefix)}
            dataset.to_netcdf(local_file)
        os.remove(temp_file)
    return xr.open_dataset(local_file, decode_cf=False, chunks={})

In [11]:
clean_cache()

In [12]:
open_one(fs, remote_files[0])

Unnamed: 0,Array,Chunk
Bytes,229.09 kiB,229.09 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 229.09 kiB 229.09 kiB Shape (117294,) (117294,) Dask graph 1 chunks in 2 graph layers Data type uint16 numpy.ndarray",117294  1,

Unnamed: 0,Array,Chunk
Bytes,229.09 kiB,229.09 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,458.18 kiB,458.18 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 458.18 kiB 458.18 kiB Shape (117294,) (117294,) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",117294  1,

Unnamed: 0,Array,Chunk
Bytes,458.18 kiB,458.18 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,229.09 kiB,229.09 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 229.09 kiB 229.09 kiB Shape (117294,) (117294,) Dask graph 1 chunks in 2 graph layers Data type uint16 numpy.ndarray",117294  1,

Unnamed: 0,Array,Chunk
Bytes,229.09 kiB,229.09 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,229.09 kiB,229.09 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 229.09 kiB 229.09 kiB Shape (117294,) (117294,) Dask graph 1 chunks in 2 graph layers Data type uint16 numpy.ndarray",117294  1,

Unnamed: 0,Array,Chunk
Bytes,229.09 kiB,229.09 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,458.18 kiB,458.18 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 458.18 kiB 458.18 kiB Shape (117294,) (117294,) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",117294  1,

Unnamed: 0,Array,Chunk
Bytes,458.18 kiB,458.18 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,458.18 kiB,458.18 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 458.18 kiB 458.18 kiB Shape (117294,) (117294,) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",117294  1,

Unnamed: 0,Array,Chunk
Bytes,458.18 kiB,458.18 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,229.09 kiB,229.09 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 229.09 kiB 229.09 kiB Shape (117294,) (117294,) Dask graph 1 chunks in 2 graph layers Data type uint16 numpy.ndarray",117294  1,

Unnamed: 0,Array,Chunk
Bytes,229.09 kiB,229.09 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,229.09 kiB,229.09 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 229.09 kiB 229.09 kiB Shape (117294,) (117294,) Dask graph 1 chunks in 2 graph layers Data type uint16 numpy.ndarray",117294  1,

Unnamed: 0,Array,Chunk
Bytes,229.09 kiB,229.09 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,458.18 kiB,458.18 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 458.18 kiB 458.18 kiB Shape (117294,) (117294,) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",117294  1,

Unnamed: 0,Array,Chunk
Bytes,458.18 kiB,458.18 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,458.18 kiB,458.18 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 458.18 kiB 458.18 kiB Shape (117294,) (117294,) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",117294  1,

Unnamed: 0,Array,Chunk
Bytes,458.18 kiB,458.18 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,114.54 kiB,114.54 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,uint8 numpy.ndarray,uint8 numpy.ndarray
"Array Chunk Bytes 114.54 kiB 114.54 kiB Shape (117294,) (117294,) Dask graph 1 chunks in 2 graph layers Data type uint8 numpy.ndarray",117294  1,

Unnamed: 0,Array,Chunk
Bytes,114.54 kiB,114.54 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,uint8 numpy.ndarray,uint8 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,458.18 kiB,458.18 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,uint32 numpy.ndarray,uint32 numpy.ndarray
"Array Chunk Bytes 458.18 kiB 458.18 kiB Shape (117294,) (117294,) Dask graph 1 chunks in 2 graph layers Data type uint32 numpy.ndarray",117294  1,

Unnamed: 0,Array,Chunk
Bytes,458.18 kiB,458.18 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,uint32 numpy.ndarray,uint32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,458.18 kiB,458.18 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 458.18 kiB 458.18 kiB Shape (117294,) (117294,) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",117294  1,

Unnamed: 0,Array,Chunk
Bytes,458.18 kiB,458.18 kiB
Shape,"(117294,)","(117294,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [13]:
datasets = open_all(fs, remote_files)

KeyboardInterrupt: 

In [None]:
datasets[0]