Merge pull request #1631 from Aakash3101/correct_docs

Updated the docs
weecology · Nov 30, 2021 · e5ba505 · e5ba505
2 parents ea5150d + fde17b2
commit e5ba505
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 179 deletions.
diff --git a/docs/rdatasets_api.rst b/docs/rdatasets_api.rst
@@ -140,7 +140,7 @@ $ ``retriever install postgres rdataset-aer-usmoney``
     Progress: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 136/136 [00:00<00:00, 2225.09rows/s]
     Done!
 
-The script created for the Socrata dataset is stored in the ``rdataset-scripts`` directory in the ``~/.retriever`` directory.
+The script created for the Rdataset is stored in the ``rdataset-scripts`` directory in the ``~/.retriever`` directory.
 
 
 Python Interface in Data Retriever
@@ -213,95 +213,6 @@ If no package is specified, it prints all the rdatasets, and if ``all`` is passe
   causaldata  dplyr     forecast   ggplot2movies  hwde      lmec     mstate      plm             reshape2  stat2data   vcd 
 
 
-Updating the Contents of Rdataset Script
-----------------------------------------
-
-The function ``update_socrata_contents`` updates the contents of the socrata script created by ``create_socrata_dataset``.
-
-The input arguments are:
-  - data_obj: The dict which contains the following keys: ``csv``, ``doc`` and ``title``. 
-  - package: The R package in which the dataset exists
-  - dataset_name: The dataset name
-  - json_file: The content of the script created
-
-The function returns ``True, json_file`` if the data_obj dict is correct,
-otherwise, it returns ``False, None``.
-
-.. code-block:: python
-
-  >>> import json
-  >>> import retriever as rt
-  >>> from retriever.lib.defaults import RDATASET_SCRIPT_WRITE_PATH
-  >>> data_obj = {
-  ...     'csv': 'https://vincentarelbundock.github.io/Rdatasets/csv/drc/metals.csv',   # csv file url
-  ...     'doc': 'https://vincentarelbundock.github.io/Rdatasets/doc/drc/metals.html',  # documentation url
-  ...     'title': 'Data from heavy metal mixture experiments',
-  ... }
-  >>> script_path = RDATASET_SCRIPT_WRITE_PATH
-  >>> script_filename = f"rdataset_{package}_{dataset_name}" + '.json'
-  >>> with open(f"{script_path}/{script_filename}", "r") as f:
-  ...       json_file = json.load(f)
-  >>> f.close()
-  >>> package = 'drc'
-  >>> dataset_name = 'metals'
-  >>> json_file = rt.update_rdataset_contents(data_obj, package, dataset_name, json_file)
-
-
-
-Updating and Renaming the Rdataset Script
------------------------------------------
-
-The function ``update_rdataset_script(data_obj, dataset_name, package, script_path)`` renames the script, 
-calls the ``update_rdataset_contents``, and then writes the new content returned by ``update_rdataset_contents``
-
-.. code-block:: python
-
-  >>> import retriever as rt
-  >>> from retriever.lib.defaults import RDATASET_SCRIPT_WRITE_PATH
-  >>> data_obj = {
-  ...     'csv': 'https://vincentarelbundock.github.io/Rdatasets/csv/drc/metals.csv',
-  ...     'doc': 'https://vincentarelbundock.github.io/Rdatasets/doc/drc/metals.html',
-  ...     'title': 'Data from heavy metal mixture experiments',
-  ... }
-  >>> script_path = RDATASET_SCRIPT_WRITE_PATH
-  >>> package = 'drc'
-  >>> dataset_name = 'metals'
-  >>> rt.update_rdataset_script(data_obj, dataset_name, package, script_path)
-
-
-Creating a Rdataset Script
---------------------------
-
-The function ``create_rdataset(engine, name, resource, script_path=None)`` creates rdataset scripts
-for retriever. This function downloads the raw data, creates the script, then updates it and at last,
-it installs the dataset according to the engine using that script.
-
-.. note::
-
-  If the engine is ``download`` then the function just downloads the raw data files.
-  But if the engine is other than ``download`` (e.g. ``postgres``), then it creates the script
-  and then installs the dataset into the engine provided.
-
-.. code-block:: python
-
-  >>> import retriever as rt
-  >>> from retriever.engines import choose_engine
-  >>> from retriever.lib.defaults import RDATASET_SCRIPT_WRITE_PATH
-  >>> 
-  >>> # engine = choose_engine({'command': 'install', 'engine': 'postgres'}) 
-  >>> # Every engine other than 'download' would download data, then create the script
-  >>> if the script does not exists, and then installs the dataset into the engine
-  >>> # Or
-  >>> engine = choose_engine({'command': 'download'})
-  >>> # The 'download' engine will just download the raw data files
-  >>> script_path = RDATASET_SCRIPT_WRITE_PATH
-  >>> package = 'drc'
-  >>> dataset_name = 'metals'
-  >>> rt.create_rdataset(engine, package, dataset_name, script_path)
-  Downloading metals.csv: 3.00B [00:00, 7.24B/s]                                                                                                    
-  >>> 
-
-
 Downloading a Rdataset
 ----------------------
 
@@ -319,3 +230,13 @@ Installing a Rdataset
   >>> import retriever as rt
   >>> rt.install_postgres('rdataset-mass-galaxies')
 
+.. note::
+
+  For downloading or installing the Rdatasets, the script name should follow the syntax given below.
+  The script name should be ``rdataset-<package name>-<dataset name>``. The ``package name`` and ``dataset name``
+  should be valid.
+
+  Example:
+    - Correct: ``rdataset-drc-earthworms``
+
+    - Incorrect:  ``rdataset-drcearthworms``, ``rdatasetdrcearthworms``
diff --git a/docs/socrata_api.rst b/docs/socrata_api.rst
@@ -218,85 +218,6 @@ The function returns a dict which contains metadata about the dataset.
            'Service Providers'}
 
 
-Updating the Contents of Socrata Dataset Script
------------------------------------------------
-
-The function ``update_socrata_contents`` updates the contents of the socrata script created by ``create_socrata_dataset``.
-
-The input arguments are:
-  - json_file = The content of the script created
-  - script_name =  The name of the script
-  - url = The url through which the dataset is downloaded
-  - resource = The object returned by the ``find_socrata_dataset_by_id``
-
-The function returns ``True, json_file`` if the resource dict is correct,
-otherwise, it returns ``False, None``.
-
-.. code-block:: python
-
-  import retriever as rt
-  import json
-  from retriever.lib.defaults import SOCRATA_SCRIPT_WRITE_PATH
-
-  resource = rt.find_socrata_dataset_by_id('35s3-nmpm')
-  filename = resource["id"] + '.csv'
-  url = 'https://' + resource["domain"] + '/resource/' + filename
-  script_name = 'socrata-35s3-nmpm'
-  script_path = SOCRATA_SCRIPT_WRITE_PATH
-  script_filename = script_name.replace("-","_") + ".json"
-  with open(f"{script_path}/{script_filename}", "r") as f:
-         json_file = json.load(f)
-  f.close()
-  result, json_file = rt.update_socrata_contents(json_file, script_name, url, resource)
-
-
-Updating and Renaming the Socrata Dataset Script
-------------------------------------------------
-
-The function ``update_socrata_script(script_name, filename, url, resource, script_path)`` renames the script, 
-calls the ``update_socrata_contents``, and then writes the new content returned by ``update_socrata_contents``
-
-.. code-block:: python
-
-  import retriever as rt
-  from retriever.lib.defaults import SOCRATA_SCRIPT_WRITE_PATH
-
-  script_path = SOCRATA_SCRIPT_WRITE_PATH
-  resource = rt.find_socrata_dataset_by_id('35s3-nmpm')
-  filename = resource["id"] + '.csv'
-  url = 'https://' + resource["domain"] + '/resource/' + filename
-  script_name = 'socrata-35s3-nmpm'
-  rt.update_socrata_script(script_name, filename, url, resource, script_path)
-
-
-Creating a Socrata Dataset Script
----------------------------------
-
-The function ``create_socrata_dataset(engine, name, resource, script_path=None)``
-creates socrata dataset scripts for retriever. This function downloads the raw data, creates the script,
-then updates it and at last, it installs the dataset according to the engine using that script.
-
-.. note::
-
-  If the engine is ``download`` then the function just downloads the raw data files.
-  But if the engine is other than ``download`` (e.g. ``postgres``), then it creates the script
-  and then installs the dataset into the engine provided.
-
-.. code-block:: python
-
-  import retriever as rt
-  from retriever.engines import choose_engine
-  from retriever.lib.defaults import SOCRATA_SCRIPT_WRITE_PATH
-
-  # engine = choose_engine({'command':'install', 'engine':'postgres'})
-  # OR
-  engine = choose_engine({'command': 'download'})
-  script_path = SOCRATA_SCRIPT_WRITE_PATH
-  resource = rt.find_socrata_dataset_by_id('35s3-nmpm')
-  name = 'socrata-35s3-nmpm'
-  rt.create_socrata_dataset(engine, name, resource, script_path)
-
-
 Downloading a Socrata Dataset
 -----------------------------
 

diff --git a/retriever/lib/__init__.py b/retriever/lib/__init__.py
@@ -13,9 +13,8 @@
 from .install import install_xml
 from .install import install_hdf5
 from .provenance import commit, commit_log
-from .rdatasets import (update_rdataset_catalog, create_rdataset,
-                        update_rdataset_contents, update_rdataset_script,
-                        display_all_rdataset_names, get_rdataset_names)
+from .rdatasets import (update_rdataset_catalog, display_all_rdataset_names,
+                        get_rdataset_names)
 from .repository import check_for_updates
 from .engine_tools import reset_retriever
 from .fetch import fetch
@@ -26,8 +25,7 @@
 from .scripts import get_script_citation
 from .._version import __version__
 from .socrata import (socrata_autocomplete_search, socrata_dataset_info,
-                      find_socrata_dataset_by_id, create_socrata_dataset,
-                      update_socrata_contents, update_socrata_script)
+                      find_socrata_dataset_by_id)
 
 __all__ = [
     'check_for_updates', 'commit', 'commit_log', 'create_package', 'datasets',
@@ -36,8 +34,6 @@
     'install_json', 'install_xml', 'install_hdf5', 'fetch', 'get_script_upstream',
     'get_dataset_names_upstream', 'get_retriever_citation', 'get_script_citation',
     "__version__", 'socrata_autocomplete_search', 'socrata_dataset_info',
-    'find_socrata_dataset_by_id', 'create_socrata_dataset', 'update_socrata_contents',
-    'update_socrata_script', 'update_rdataset_catalog', 'create_rdataset',
-    'update_rdataset_contents', 'update_rdataset_script', 'display_all_rdataset_names',
+    'find_socrata_dataset_by_id', 'update_rdataset_catalog', 'display_all_rdataset_names',
     'get_rdataset_names'
 ]
diff --git a/test/test_retriever.py b/test/test_retriever.py
@@ -15,6 +15,8 @@
 from retriever.lib.engine_tools import xml2csv_test
 from retriever.lib.table import TabularDataset
 from retriever.lib.templates import BasicTextTemplate
+from retriever.lib.socrata import update_socrata_contents
+from retriever.lib.rdatasets import update_rdataset_contents
 
 try:
     from retriever.lib.engine_tools import geojson2csv
@@ -456,15 +458,16 @@ def test_update_rdataset_contents(test_name, package, dataset_name, json_file, e
         data_obj = {'xyz': 'abc'}
     else:
         data_obj = rdatasets[package][dataset_name]
-    result, updated_json = rt.update_rdataset_contents(data_obj, package, dataset_name, json_file)
+    result, updated_json = update_rdataset_contents(data_obj, package, dataset_name,
+                                                    json_file)
     assert (result == expected[0]) and (updated_json == expected[1])
 
 
 @pytest.mark.parametrize("test_name, id, json_file, script_name, url, expected", update_socrata_datasets)
 def test_update_socrata_contents(test_name, id, json_file, script_name, url, expected):
     """Checks if the update socrata script updates the default script contents"""
     resource = rt.find_socrata_dataset_by_id(id)
-    result, updated_json = rt.update_socrata_contents(json_file, script_name, url, resource)
+    result, updated_json = update_socrata_contents(json_file, script_name, url, resource)
     assert (result == expected[0]) and (updated_json == expected[1])