Skip to content

Commit

Permalink
Merge aaa073e into b827394
Browse files Browse the repository at this point in the history
  • Loading branch information
pointe77 committed Mar 30, 2021
2 parents b827394 + aaa073e commit 076e6e1
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 4 deletions.
30 changes: 27 additions & 3 deletions intake_dal/dal_catalog.py
Expand Up @@ -20,13 +20,16 @@ class DalCatalog(NestedYAMLFileCatalog):
name = "dal_cat"
version = pkg_resources.get_distribution("intake-dal").version

def __init__(self, path, storage_mode=None, autoreload=True, **kwargs):
def __init__(self, path=None, catalog_data=None, storage_mode=None, autoreload=True, **kwargs):
"""
Parameters
----------
path: str
Location of the file to parse (can be remote)
reload : bool
catalog_data: dict
If catalog data is in memory, pass it through `catalog_data` to populate the intake catalog.
If the dataset/catalog is in the local or a specific url is given, please use the `path` argument.
reload: bool
Whether to watch the source file for changes; make False if you want
an editable Catalog
storage_mode: str
Expand All @@ -49,7 +52,18 @@ def __init__(self, path, storage_mode=None, autoreload=True, **kwargs):
>>> df = cat.user_events.read()
"""
self.storage_mode = storage_mode
super(DalCatalog, self).__init__(path, autoreload, **kwargs)

self.is_path = False
if catalog_data and not path:
# A user passes catalog data, not passes path info.
self.path_or_catalog = catalog_data
else:
# A user passes path and url.
# In this case, ignore catalog_data.
self.path_or_catalog = path
self.is_path = True

super(DalCatalog, self).__init__(self.path_or_catalog, autoreload, **kwargs)

def __getitem__(self, key):
# TODO(Taleb Zeghmi): Remove once https://github.com/zillow/intake-nested-yaml-catalog/issues/6 is resolved
Expand All @@ -59,6 +73,16 @@ def __getitem__(self, key):
ret = super().__getitem__(key)
return ret

def _load(self, reload=False):
if self.is_path:
# File path or url. Load and parse.
super()._load()
else:
# It's catalog data and not requires directory/url information.
# Set self._dir to an empty value
self._dir = ""
self.parse(yaml.dump(self.path_or_catalog))

def parse(self, text):
data = yaml_load(text)

Expand Down
5 changes: 5 additions & 0 deletions intake_dal/tests/conftest.py
Expand Up @@ -10,6 +10,11 @@ def catalog_path():
return str(Path(__file__).resolve().parent.joinpath(Path("catalog.yaml")))


@pytest.fixture
def remote_catalog_path():
return str(Path(__file__).resolve().parent.joinpath(Path("remote_storage_catalog.yaml")))


@pytest.fixture
def serving_cat(catalog_path: str):
return DalCatalog(catalog_path, storage_mode="serving")
Expand Down
27 changes: 27 additions & 0 deletions intake_dal/tests/remote_storage_catalog.yaml
@@ -0,0 +1,27 @@
name: My Sample Catalog
metadata:
hierarchical_catalog: true
entity:
property:
user_event:
args:
default: batch
storage:
batch: parquet://https://my_storage.com/user_event/date={{date}}/*.parquet
golden: parquet://https://my_storage.com/user_event/golden/date={{date}}/*.parquet
description: This is user_event description
driver: dal
metadata:
owner_team: my-team
public: false
user_dataset:
args:
default: batch
storage:
batch: parquet://https://my_storage.com/user_dataset/date={{date}}/*.parquet
serving: dal-online://https://featurestore.url.net#userid
description: This is user_dataset description
driver: dal
metadata:
owner_team: my-team
public: false
16 changes: 15 additions & 1 deletion intake_dal/tests/test_dal_catalog.py
@@ -1,5 +1,5 @@
import yaml
import pandas as pd

from intake_dal.dal_catalog import DalCatalog


Expand Down Expand Up @@ -69,3 +69,17 @@ def validate_dataset(ds):
validate_dataset(cat["entity.user.user_events"])
validate_dataset(cat.entity["user.user_events"])
validate_dataset(cat.entity.user["user_events"])


def test_dal_catalog_passing_dict(remote_catalog_path):
with open(remote_catalog_path, 'r') as f:
data = yaml.load(f)

# Instead of passing path, passes the catalog data read from the file.
cat = DalCatalog(catalog_data=data, storage_mode="golden")

assert cat.entity.property.user_event.default == "golden"
assert cat.entity.property.user_dataset.default == "golden"

assert len(cat.entity.property.user_event.storage) == 2
assert len(cat.entity.property.user_dataset.storage) == 2

0 comments on commit 076e6e1

Please sign in to comment.