In [1]:
import atexit
import os
import sys
import tarfile
import time
import types
import warnings
from multiprocessing import Pipe, Process
from multiprocessing.connection import Connection
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union
from urllib.parse import urlsplit

import numpy as np
from more_itertools import always_iterable

from yt.data_objects.static_output import Dataset
from yt.funcs import levenshtein_distance
from yt.sample_data.api import lookup_on_disk_data
from yt.utilities.decompose import decompose_array, get_psize
from yt.utilities.exceptions import (
    MountError,
    YTAmbiguousDataType,
    YTIllDefinedAMR,
    YTSimulationNotIdentified,
    YTUnidentifiedDataType,
)
from yt.utilities.hierarchy_inspection import find_lowest_subclasses
from yt.utilities.lib.misc_utilities import get_box_grids_level
from yt.utilities.logger import ytLogger as mylog
from yt.utilities.object_registries import (
    output_type_registry,
    simulation_time_series_registry,
)
from yt.utilities.on_demand_imports import _pooch as pooch, _ratarmount as ratarmount

AttributeError: 'dict' object has no attribute 'attrs'

In [6]:
# --- Loader for yt sample datasets ---
def load_sample(fn: Optional[str] = None, *, progressbar: bool = True, timeout=None, **kwargs):
    if fn is None:
        print(
            "One can see which sample datasets are available at: https://yt-project.org/data\n"
            "or alternatively by running: yt.sample_data.api.get_data_registry_table()",
            file=sys.stderr,
        )
        return None

    from yt.sample_data.api import (
        _download_sample_data_file,
        _get_test_data_dir_path,
        get_data_registry_table,
    )

    pooch_logger = pooch.utils.get_logger()


    fn = str(fn).replace("/", os.path.sep)

    topdir, _, specific_file = fn.partition(os.path.sep)

    registry_table = get_data_registry_table()

    known_names: List[str] = registry_table.dropna()["filename"].to_list()
    if topdir not in known_names:
        msg = f"'{topdir}' is not an available dataset."
        lexical_distances: List[Tuple[str, int]] = [
            (name, levenshtein_distance(name, topdir)) for name in known_names
        ]
        suggestions: List[str] = [name for name, dist in lexical_distances if dist < 4]
        if len(suggestions) == 1:
            msg += f" Did you mean '{suggestions[0]}' ?"
        elif suggestions:
            msg += " Did you mean to type any of the following ?\n\n    "
            msg += "\n    ".join(f"'{_}'" for _ in suggestions)
        raise ValueError(msg)

  
    specs = registry_table.query(f"`filename` == '{topdir}'").iloc[0]

    load_name = specific_file or specs["load_name"] or ""

    if not isinstance(specs["load_kwargs"], dict):
        raise ValueError(
            "The requested dataset seems to be improperly registered.\n"
            "Tip: the entry in yt/sample_data_registry.json may be inconsistent with "
            "https://github.com/yt-project/website/blob/master/data/datafiles.json\n"
            "Please report this to https://github.com/yt-project/yt/issues/new"
        )

    kwargs = {**specs["load_kwargs"], **kwargs}

    save_dir = _get_test_data_dir_path()

    data_path = save_dir.joinpath(fn)
    if save_dir.joinpath(topdir).exists():
        # if the data is already available locally, `load_sample`
        # only acts as a thin wrapper around `load`
        if load_name and os.sep not in fn:
            data_path = data_path.joinpath(load_name)
        mylog.info("Sample dataset found in '%s'", data_path)
        if timeout is not None:
            mylog.info("Ignoring the `timeout` keyword argument received.")
        return load(data_path, **kwargs)

    mylog.info("'%s' is not available locally. Looking up online.", fn)

    # effectively silence the pooch's logger and create our own log instead
    pooch_logger.setLevel(100)
    mylog.info("Downloading from %s", specs["url"])

    # downloading via a pooch.Pooch instance behind the scenes
    filename = urlsplit(specs["url"]).path.split("/")[-1]

    tmp_file = _download_sample_data_file(
        filename, progressbar=progressbar, timeout=timeout
    )

    # pooch has functionalities to unpack downloaded archive files,
    # but it needs to be told in advance that we are downloading a tarball.
    # Since that information is not necessarily trivial to guess from the filename,
    # we rely on the standard library to perform a conditional unpacking instead.
    if tarfile.is_tarfile(tmp_file):
        mylog.info("Untaring downloaded file to '%s'", save_dir)
        with tarfile.open(tmp_file) as fh:
            fh.extractall(save_dir)
        os.remove(tmp_file)
    else:
        os.replace(tmp_file, save_dir)

    loadable_path = Path.joinpath(save_dir, fn)
    if load_name not in str(loadable_path):
        loadable_path = loadable_path.joinpath(load_name, specific_file)

    return load(loadable_path, **kwargs)

In [8]:
def load(
    fn: Union[str, "os.PathLike[str]"], *args, hint: Optional[str] = None, **kwargs
):
    fn = os.fspath(fn)

    if any(wildcard in fn for wildcard in "[]?!*"):
        from yt.data_objects.time_series import DatasetSeries

        return DatasetSeries(fn, *args, hint=hint, **kwargs)

    # This will raise FileNotFoundError if the path isn't matched
    # either in the current dir or yt.config.ytcfg['data_dir_directory']
    if not fn.startswith("http"):
        fn = str(lookup_on_disk_data(fn))

    candidates = []
    for cls in output_type_registry.values():
        if cls._is_valid(fn, *args, **kwargs):
            candidates.append(cls)

    # Find only the lowest subclasses, i.e. most specialised front ends
    candidates = find_lowest_subclasses(candidates, hint=hint)

    if len(candidates) == 1:
        return candidates[0](fn, *args, **kwargs)

    if len(candidates) > 1:
        raise YTAmbiguousDataType(fn, candidates)

    raise YTUnidentifiedDataType(fn, *args, **kwargs)

In [9]:
load_sample("IsolatedGalaxy")

yt : [INFO     ] 2022-03-09 11:50:51,643 Sample dataset found in '/Users/yilinxia/Desktop/DXL/yt/IsolatedGalaxy/galaxy0030/galaxy0030'
yt : [INFO     ] 2022-03-09 11:50:51,771 Parameters: current_time              = 0.0060000200028298
yt : [INFO     ] 2022-03-09 11:50:51,773 Parameters: domain_dimensions         = [32 32 32]
yt : [INFO     ] 2022-03-09 11:50:51,775 Parameters: domain_left_edge          = [0. 0. 0.]
yt : [INFO     ] 2022-03-09 11:50:51,777 Parameters: domain_right_edge         = [1. 1. 1.]
yt : [INFO     ] 2022-03-09 11:50:51,777 Parameters: cosmological_simulation   = 0


EnzoDataset: /Users/yilinxia/Desktop/DXL/yt/IsolatedGalaxy/galaxy0030/galaxy0030