Skip to content

Commit

Permalink
Add/R requirements parser (#18)
Browse files Browse the repository at this point in the history
* bug fixes for badge generation
* adding support for parsing an R requirements file (in DESCRIPTION)
* we do not need to test saving
* test error

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
Co-authored-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch and vsoch committed Apr 3, 2022
1 parent 061d28f commit 8adfd57
Show file tree
Hide file tree
Showing 13 changed files with 416 additions and 53 deletions.
3 changes: 2 additions & 1 deletion citelang/main/packages/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
from .github import GitHubManager
from .spack import SpackManager
from .pypi_requirements import RequirementsManager
from .cran_description import RPackageManager

# Registered endpoints (populated on init)
managers = {}
manager_names = []

for manager in [GitHubManager, SpackManager, RequirementsManager]:
for manager in [GitHubManager, SpackManager, RequirementsManager, RPackageManager]:
manager_names.append(manager.name)
managers[manager.name] = manager

Expand Down
62 changes: 62 additions & 0 deletions citelang/main/packages/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import sys
from citelang.logger import logger
import citelang.main.cache as cache
import citelang.utils as utils
import requests


Expand Down Expand Up @@ -57,3 +58,64 @@ def get_or_fail(self, url, headers=None, return_text=False):

def package(self, name, **kwargs):
raise NotImplementedError


class PackagesFromFile(PackageManager):
"""
Load packages from file. The class requires a parse function to parse
the content provided.
"""

def __init__(self, package_name=None, content=None):
"""
An R package manager parses packages from a DESCRIPTION
"""
self.version = None
self.package_name = package_name
super().__init__()
if package_name:
self.set_name(package_name)
self.data = {}
if content:
self.parse(content)

def set_name(self, name):
if "@" in name:
name, version = name.split("@", 1)
self.version = version
if "[" in name and "]" in name:
name = name.split("[", 1)[0]

# invalid characters found!
name = name.replace(";", "")
self.package_name = name

def get_repo(self):
"""
Helper function to return start of repo metadata. Intended to be used
in the self.parse() function.
"""
# The "repo" is the package name, we can't be sure about versions
versions = [
{
"published_at": utils.get_time_now(),
"number": self.version or "latest",
}
]
repo = {"name": self.package_name, "versions": versions}

# Try to provide a default version
repo["default_version"] = self.version or "latest"
return repo

def package(self, name, **kwargs):
"""
The package endpoint ignores the name and just returns parsed data
"""
return self.data.get("package")

def parse(self, content):
"""
Parse the self.content
"""
raise NotImplementedError
107 changes: 107 additions & 0 deletions citelang/main/packages/cran_description.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
__author__ = "Vanessa Sochat"
__copyright__ = "Copyright 2022, Vanessa Sochat"
__license__ = "MPL 2.0"

# Parse a requirements.txt file to generate a "package"
import citelang.main.endpoints as endpoints

import re

from .base import PackagesFromFile


class RPackageManager(PackagesFromFile):
"""
Packages from an R DESCRIPTION file, meaning an R package
"""

name = "R-Package"
underlying_manager = "cran"
default_language = "R"
project_count = None
homepage = "https://cran.r-project.org/"
color = "#006dad"
default_versions = None

def parse(self, content):
"""
Parse the self.content (the DESCRIPTION file)
"""
repo = self.get_repo()

libs = []
parsing = False
for line in content.split("\n"):
# If we are parsing, but hit the end of the list
if parsing and not line.startswith(" "):
break
elif parsing:
libs.append(line.replace(",", "").strip())
elif "Imports:" in line:
parsing = True

deps = []
for line in libs:
version = None
package_name = None
if re.search("(==|<=|>=)", line):
line = line.replace(")", "").replace("(", "")
package_name, _, version = re.split("(==|<=|>=)", line)
package_name = package_name.strip()
version = version.strip()
else:
package_name = line

# We cannot parse a dep without a name
if not package_name:
continue

# First add requirements (names and pypi manager) to deps

# Try to get from cache - either versioned or not
pkg = None
if package_name and version:
cache_name = f"package/cran/{package_name}/{version}"
result = self.cache.get(cache_name)
if result:
pkg = endpoints.get_endpoint("package", data=result)

elif package_name and not version:
cache_name = f"package/cran/{package_name}"
result = self.cache.get(cache_name)
if result:
pkg = endpoints.get_endpoint("package", data=result)

if pkg is None:
pkg = endpoints.get_endpoint(
"package",
package_name=package_name,
manager=self.underlying_manager,
)

# Ensure we have version, fallback to latest
if not version:
version = pkg.data["latest_release_number"]

# Require saving to cache here - many expensive calls
cache_name = f"package/cran/{package_name}/{version}"
self.cache.set(cache_name, pkg)

# use latest release version. This will be wrong for an old
# dependency, but it's not worth it to make a ton of extra API calls
dep = {
"name": package_name,
"project_name": package_name,
"number": version,
"published_at": pkg.data["latest_stable_release_published_at"],
"researched_at": None,
"spdx_expression": "NOASSERTION",
"original_license": pkg.data["licenses"],
"repository_sources": ["Cran"],
}
deps.append(dep)

repo["dependencies"] = deps
self.data["package"] = repo
self.data["dependencies"] = deps
return repo
52 changes: 4 additions & 48 deletions citelang/main/packages/pypi_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,68 +4,30 @@

# Parse a requirements.txt file to generate a "package"
import citelang.main.endpoints as endpoints
import citelang.utils as utils

import re

from .base import PackageManager
from .base import PackagesFromFile


class RequirementsManager(PackageManager):
class RequirementsManager(PackagesFromFile):
"""
Packages from GitHub, either release, or branch.
Packages parsed from a requirements.txt file (so from cran)
"""

name = "requirements.txt"
underlying_manager = "pypi"
default_language = None
project_count = None
homepage = "pypi.org/"
color = "#006dad"
default_language = "Python"
default_versions = None

def __init__(self, package_name=None, content=None):
"""
A requirements manager, unlike other custom packages, does parsing
of dependencies that are provided in content (a list read from
requirements.txt) and ignores the name passed to package or dependencies.
"""
self.version = None
self.package_name = package_name
super().__init__()
if package_name:
self.set_name(package_name)
self.data = {}
if content:
self.parse(content)

def set_name(self, name):
if "@" in name:
name, version = name.split("@", 1)
self.version = version
if "[" in name and "]" in name:
name = name.split("[", 1)[0]

# invalid characters found!
name = name.replace(";", "")
self.package_name = name

def parse(self, content):
"""
Parse the self.content (the requirements.txt file)
"""
# The "repo" is the package name, we can't be sure about versions
versions = [
{
"published_at": utils.get_time_now(),
"number": self.version or "latest",
}
]
repo = {"name": self.package_name, "versions": versions}

# Try to provide a default version
repo["default_version"] = self.version or "latest"
repo = self.get_repo()

# Dependencies we parse as pypi packages
# This should also update the cache and make it easier to retrieve later
Expand Down Expand Up @@ -132,9 +94,3 @@ def parse(self, content):
self.data["package"] = repo
self.data["dependencies"] = deps
return repo

def package(self, name, **kwargs):
"""
The package endpoint ignores the name and just returns parsed data
"""
return self.data.get("package")
18 changes: 14 additions & 4 deletions citelang/main/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,19 +255,29 @@ def gen(self, name, filename=None, *args, **kwargs):
# Do we have a known dependency file?
basename = os.path.basename(filename)
pkg = None
if basename == "requirements.txt":
if basename in ["requirements.txt", "DESCRIPTION"]:
manager_kwargs = {"content": self.content, "package_name": name}

# Custom set the name of the manager
manager = (
"requirements.txt" if basename == "requirements.txt" else "R-Package"
)

pkg = package.get_package(
manager="requirements.txt",
manager=manager,
name=name,
manager_kwargs=manager_kwargs,
)
# Populate dependencies and package
pkg.info()

uid = "requirements.txt:%s" % filename
self.roots[uid] = self._graph(manager="pypi", name=name, pkg=pkg, **kwargs)
uid = "%s:%s" % (manager, filename)
self.roots[uid] = self._graph(
manager=pkg.underlying_manager.underlying_manager,
name=name,
pkg=pkg,
**kwargs,
)

if not pkg:
logger.exit(f"Dependency file type {basename} not known, or none found.")
Expand Down
50 changes: 50 additions & 0 deletions citelang/tests/test_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,56 @@
import citelang.main.client as client
import citelang.main.schemas as schemas

here = os.path.dirname(os.path.abspath(__file__))


@pytest.mark.parametrize(
"name,filename,deps",
[
(
"python-lib",
"requirements.txt",
[
"pypi",
"pytest",
"types",
"requests",
"sphinx",
"babel",
"docutils",
"Pygments",
"pypi",
"requirements.txt",
],
),
(
"r-lib",
"DESCRIPTION",
[
"cran",
"rmarkdown",
"knitr",
"callr",
"methods",
"R",
"shiny",
"R-Package",
],
),
],
)
def test_package_files(name, filename, deps):
"""
Test loading custom package files
"""
cli = client.get_parser(filename=os.path.join(here, "testdata", filename))
result = cli.gen(name=name)

content = result.render()
for string in ["Software Credit", name] + deps:
assert string in content
print(content)


@pytest.mark.parametrize(
"manager,name",
Expand Down
28 changes: 28 additions & 0 deletions citelang/tests/testdata/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
Package: eia
Title: API Wrapper for 'US Energy Information Administration' Open Data
Version: 0.3.7
Authors@R: person("Matthew", "Leonawicz", email = "mfleonawicz@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9452-2771"))
Description: Provides API access to data from the 'US Energy Information Administration' ('EIA') <https://www.eia.gov/>.
Use of the API requires a free API key obtainable at <https://www.eia.gov/opendata/register.php>.
The package includes functions for searching 'EIA' data categories and importing time series and geoset time series datasets.
Datasets returned by these functions are provided in a tidy format or alternatively in more raw form.
It also offers helper functions for working with 'EIA' date strings and time formats and for inspecting different summaries of series metadata.
The package also provides control over API key storage and caching of API request results.
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
URL: https://docs.ropensci.org/eia/ (website) https://github.com/ropensci/eia
BugReports: https://github.com/ropensci/eia/issues
Imports:
tibble,
cli (>= 3.2.0)
Suggests:
testthat,
knitr,
rmarkdown,
covr,
tidyr,
ggplot2
VignetteBuilder: knitr
Language: en-US
RoxygenNote: 7.1.1

0 comments on commit 8adfd57

Please sign in to comment.