Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update xgi_data.py #254

Merged
merged 10 commits into from
Dec 20, 2022
3 changes: 2 additions & 1 deletion docs/source/api/readwrite/xgi.readwrite.xgi_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ xgi.readwrite.xgi_data

.. rubric:: Functions

.. autofunction:: load_xgi_data
.. autofunction:: load_xgi_data
.. autofunction:: download_xgi_data
84 changes: 73 additions & 11 deletions xgi/readwrite/xgi_data.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,27 @@
import requests

import json
import os
from warnings import warn
from .. import convert
from ..exception import XGIError

__all__ = ["load_xgi_data"]

__all__ = ["load_xgi_data", "download_xgi_data"]

def load_xgi_data(dataset, nodetype=None, edgetype=None, max_order=None):
"""_summary_
def load_xgi_data(dataset, path='', read=True, nodetype=None, edgetype=None,
max_order=None):
"""Load a data set from the xgi-data repository or a local file.

Parameters
----------
dataset : str
Dataset name. Valid options are the top-level tags of the
index.json file in the xgi-data repository.

path : str, optional
Path to a local copy of the data set
read : bool, optional
If read==True, search for a local copy of the data set. Use the local
copy if it exists, otherwise use the xgi-data repository.
nodetype : type, optional
Type to cast the node ID to
edgetype : type, optional
Expand All @@ -32,15 +39,70 @@ def load_xgi_data(dataset, nodetype=None, edgetype=None, max_order=None):
XGIError
The specified dataset does not exist.
"""
index_url = "https://raw.githubusercontent.com/ComplexGroupInteractions/xgi-data/main/index.json"

if read:
cfp = os.path.join(path, dataset+'.json')
if os.path.exists(cfp):
data = json.load(open(cfp, 'r'))
else:
acuschwarze marked this conversation as resolved.
Show resolved Hide resolved
warn(f"No local copy was found at {cfp}. The data is requested from the xgi-data repository instead. To download a local copy, use `download_xgi_data`.")
data = _request_from_xgi_data(dataset)
else:
data = _request_from_xgi_data(dataset)

return convert.dict_to_hypergraph(
data, nodetype=nodetype, edgetype=edgetype, max_order=max_order
)


def download_xgi_data(dataset, path=''):
"""Make a local copy of a dataset in the xgi-data repository.

Parameters
----------
dataset : str
Dataset name. Valid options are the top-level tags of the
index.json file in the xgi-data repository.

path : str, optional
Path to where the local copy should be saved. If none is given, save
file to local directory.
"""

jsondata = _request_from_xgi_data(dataset)
jsonfile = open(os.path.join(path, dataset+'.json'), 'w')
json.dump(jsondata, jsonfile)
jsonfile.close()


def _request_from_xgi_data(dataset):
"""Request a dataset from xgi-data.

Parameters
----------
dataset : str
Dataset name. Valid options are the top-level tags of the
index.json file in the xgi-data repository.

Returns
-------
Data
The requested data loaded from a json file.
acuschwarze marked this conversation as resolved.
Show resolved Hide resolved

See also
---------
load_xgi_data
"""

index_url = "https://gitlab.com/complexgroupinteractions/xgi-data/-/raw/main/index.json?inline=false"
index = requests.get(index_url).json()
if dataset not in index:

key = dataset.lower()
if key not in index:
print("Valid dataset names:")
print(*index, sep="\n")
raise XGIError("Must choose a valid dataset name!")

r = requests.get(index[dataset]["url"])
data = requests.get(index[key]["url"]).json()

return convert.dict_to_hypergraph(
r.json(), nodetype=nodetype, edgetype=edgetype, max_order=max_order
)
return data