In [66]:
import xarray as xr
import os
from datetime import datetime
import json

In [78]:
for root, dirs, files in os.walk("nc"):
    for file in files:
        fin = os.path.join(root, file)
        nc = xr.load_dataset(fin)
        ancillary_variables = collect_ancilary_vars(nc)
        for var in nc:
            if var in ancillary_variables:
                continue
            for _, group in nc[var].groupby("N_PROF", restore_coord_dims=False):
                lat = float(group.latitude)
                lon = float(group.longitude)
                time = group.time.values.astype("datetime64[m]").astype(datetime)
                fname = f"{lat}_{lon}_{time:%Y%m%dT%H%M}_{var}.json"
                os.makedirs(f"out/{var}", exist_ok=True)
                path = os.path.join("out", var, fname)
                with open(path, "w") as f:
                    json.dump(group.to_dict(),f, default=dtjson)

In [16]:
def collect_ancilary_vars(ds: xr.Dataset):
    ancillary_variables = []
    for var in ds.filter_by_attrs(ancillary_variables=lambda x: x is not None):
        ancillary_variables.extend(ds[var].attrs["ancillary_variables"].split())
    return set(ancillary_variables)

In [4]:
nc = xr.open_dataset("nc/06AQ19870704_hy1.csv.nc")

In [69]:
def dtjson(o):
    if isinstance(o, datetime):
        return o.isoformat()

In [73]:
ancillary_variables = collect_ancilary_vars(nc)
os.makedirs("out", exist_ok=True)
for var in nc:
    if var in ancillary_variables:
        continue
    for _, group in nc[var].groupby("N_PROF"):
        lat = float(group.latitude)
        lon = float(group.longitude)
        time = group.time.values.astype("datetime64[m]").astype(datetime)
        fname = f"{lat}_{lon}_{time:%Y%m%dT%H%M}_{var}.json"
        path = os.path.join("out", fname)
        with open(path, "w") as f:
            json.dump(group.to_dict(),f, default=dtjson)

In [36]:
for label,ds in nc[["oxygen"]].groupby("N_PROF"):
    del ds.attrs["comments"]
    print(ds.to_dict())

{'coords': {'time': {'dims': (), 'attrs': {'standard_name': 'time', 'axis': 'T', 'whp_name': ['DATE', 'TIME'], 'resolution': 0.000694}, 'data': datetime.datetime(1987, 7, 7, 17, 30)}, 'sample': {'dims': ('N_LEVELS',), 'attrs': {'whp_name': 'SAMPNO'}, 'data': ['24', '23', '22', '21', '20', '19', '18', '17', '16', '15', '13', '14', '11', '5', '4', '10', '9', '8', '7', '12', '6', '3', '2', '1']}, 'latitude': {'dims': (), 'attrs': {'whp_name': 'LATITUDE', 'standard_name': 'latitude', 'units': 'degree_north', 'axis': 'Y'}, 'data': 81.2717}, 'longitude': {'dims': (), 'attrs': {'whp_name': 'LONGITUDE', 'standard_name': 'longitude', 'units': 'degree_east', 'axis': 'X'}, 'data': 31.4033}, 'station': {'dims': (), 'attrs': {'whp_name': 'STNNBR'}, 'data': '269'}, 'expocode': {'dims': (), 'attrs': {'whp_name': 'EXPOCODE'}, 'data': '06AQ19870704'}, 'pressure': {'dims': ('N_LEVELS',), 'attrs': {'whp_name': 'CTDPRS', 'whp_unit': 'DBAR', 'standard_name': 'sea_water_pressure', 'units': 'dbar', 'axis': '