Skip to content

Commit

Permalink
master
Browse files Browse the repository at this point in the history
  • Loading branch information
mfshao committed Jun 10, 2022
2 parents be511cd + 026ba23 commit ac38754
Show file tree
Hide file tree
Showing 24 changed files with 1,329 additions and 1,432 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ jobs:
- uses: actions/checkout@v2
with:
ref: ${{ github.head_ref }}
- name: Set up Python 3.8
- name: Set up Python 3.9
uses: actions/setup-python@v1
with:
python-version: 3.8
python-version: 3.9
- uses: actions/cache@preview
with:
path: ~/.cache/pypoetry/virtualenvs
Expand All @@ -30,14 +30,14 @@ jobs:
poetry install -vv
# install sphinx from PyPI (as of 03/16/21 python3-sphinx is broken)
# sudo apt-get install python3-sphinx
pip install 'sphinx<4.0.0'
pip install sphinx
pip uninstall -y asyncio
cd
- name: Build docs
run: |
sphinx-build --version
source $HOME/.poetry/env
export PYTHONPATH="${PYTHONPATH}:${{ env.pythonLocation }}/lib/python3.8/site-packages"
export PYTHONPATH="${PYTHONPATH}:${{ env.pythonLocation }}/lib/python3.9/site-packages"
cd docs
poetry run make html
cd ..
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ repos:
- id: no-commit-to-branch
args: [--branch, develop, --branch, master, --pattern, release/.*]
- repo: https://github.com/psf/black
rev: 20.8b1
rev: 22.3.0
hooks:
- id: black
additional_dependencies: ['click==8.0.4']
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ build:
image: latest

python:
version: 3.8
version: 3.9
setup_py_install: true

formats: []
18 changes: 9 additions & 9 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"files": "(docs\\/_build|poetry.lock)|^.secrets.baseline$",
"lines": null
},
"generated_at": "2022-02-22T16:55:11Z",
"generated_at": "2022-06-07T20:32:38Z",
"plugins_used": [
{
"name": "AWSKeyDetector"
Expand Down Expand Up @@ -350,49 +350,49 @@
{
"hashed_secret": "96c9184fb19c9c1618ccf44d141f8029a739891c",
"is_verified": false,
"line_number": 159,
"line_number": 426,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "e1da93616713812cb50e0ac845b1e9e305d949f1",
"is_verified": false,
"line_number": 355,
"line_number": 562,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "47f42f4c34fddab383b817e689dc0fb75af81266",
"is_verified": false,
"line_number": 379,
"line_number": 586,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "300d95dd5d30ab6928ffda6c08c6a129a23e5b39",
"is_verified": false,
"line_number": 403,
"line_number": 610,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "f9e664db75c7f23a299b0b055c10e08d47073e93",
"is_verified": false,
"line_number": 471,
"line_number": 678,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "7c35c215b326b9463b669b657c1ff9873ff53d9a",
"is_verified": false,
"line_number": 612,
"line_number": 819,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "0d515eaf06062d52e8c80abb4d3b713a65396d30",
"is_verified": false,
"line_number": 616,
"line_number": 823,
"type": "Hex High Entropy String"
},
{
"hashed_secret": "b4cff7c2af45cdfe66195ec574a7b8832f8621ea",
"is_verified": false,
"line_number": 621,
"line_number": 828,
"type": "Hex High Entropy String"
}
],
Expand Down
8 changes: 5 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ python:
- 3.10
jobs:
allow_failures:
- python: 3.9
- python: 3.6
- python: 3.7
- python: 3.8
- python: 3.10
before_install:
- pip install --upgrade pip
Expand All @@ -31,7 +33,7 @@ deploy:
skip_cleanup: true
script: poetry publish -n -vv -r testpypi || true
on:
python: 3.8
python: 3.9
repo: uc-cdis/gen3sdk-python
tags: false
all_branches: true
Expand All @@ -41,7 +43,7 @@ deploy:
skip_cleanup: true
script: poetry publish -n -vv
on:
python: 3.8
python: 3.9
repo: uc-cdis/gen3sdk-python
tags: true
env:
Expand Down
4 changes: 2 additions & 2 deletions docs/howto/diirmIndexing.md
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ occurred. md5, size, url, and authz values can be validated.

`is_valid_manifest_format` can validate md5, size, url and authz values by
making use of the `MD5Validator`, `SizeValidator`, `URLValidator`, and
`AuthzValidator` classes defined in `gen3.tools.indexing.manifest_columns`,
`AuthzValidator` classes defined in `gen3.tools.utils`,
respectively. See documentation in these `Validator` subclasses for more details
on how specific values are validated.

Expand Down Expand Up @@ -308,7 +308,7 @@ import sys
import logging

from gen3.tools.indexing import is_valid_manifest_format
from gen3.tools.indexing.manifest_columns import Columns
from gen3.tools.utils import Columns

logging.basicConfig(filename="output.log", level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
Expand Down
14 changes: 13 additions & 1 deletion gen3/cli/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,19 @@ def manifest():
type=int,
show_default=True,
)
@click.option(
"--input-manifest",
"input_manifest",
help="Input file. Read available object data only for records referenced in this file. "
"Currently requires at a minimum an `m5d` column with checksum.",
default=None,
type=click.Path(writable=True),
show_default=True,
)
@click.pass_context
def objects_manifest_read(ctx, output_file, num_processes, max_concurrent_requests):
def objects_manifest_read(
ctx, output_file, num_processes, max_concurrent_requests, input_manifest
):
auth = ctx.obj["auth_factory"].get()
loop = get_or_create_event_loop_for_thread()
click.echo(f"Getting minimal object metadata from {auth.endpoint}")
Expand All @@ -66,6 +77,7 @@ def objects_manifest_read(ctx, output_file, num_processes, max_concurrent_reques
output_filename=output_file,
num_processes=num_processes,
max_concurrent_requests=max_concurrent_requests,
input_manifest=input_manifest,
)
)
click.echo(output_file)
Expand Down
28 changes: 28 additions & 0 deletions gen3/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,34 @@ async def async_get_records_on_page(self, limit=None, page=None, _ssl=None):

return response.get("records")

@backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
async def async_get_records_from_checksum(
self, checksum, checksum_type="md5", _ssl=None
):
"""
Asynchronous function to request records from indexd matching checksum.
Args:
checksum (str): indexd checksum to request
checksum_type (str): type of checksum, defaults to md5
Returns:
List[dict]: List of indexd records
"""
all_records = []
params = {}

params["hash"] = f"{checksum_type}:{checksum}"

query = urllib.parse.urlencode(params)

url = f"{self.client.url}/index" + "?" + query
async with aiohttp.ClientSession() as session:
async with session.get(url, ssl=_ssl) as response:
response = await response.json()

return response.get("records")

@backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
def get(self, guid, dist_resolution=True):
"""
Expand Down
26 changes: 6 additions & 20 deletions gen3/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
_verify_schema,
)
from gen3.auth import Gen3Auth
from gen3.tools.indexing.manifest_columns import (
from gen3.tools.utils import (
RECORD_TYPE_STANDARD_KEY,
GUID_COLUMN_NAMES,
FILENAME_COLUMN_NAMES,
Expand Down Expand Up @@ -511,7 +511,6 @@ def _extract_non_indexd_metadata(metadata):
indexd_doc.file_name,
indexd_doc.size,
indexd_doc.hashes,
indexd_doc.authz,
indexd_doc.urls,
package_contents,
)
Expand All @@ -528,19 +527,18 @@ def _extract_non_indexd_metadata(metadata):
return to_submit

def _get_package_metadata(
self, submitted_metadata, file_name, file_size, hashes, authz, urls, contents
self, submitted_metadata, file_name, file_size, hashes, urls, contents
):
"""
The MDS /objects API currently expects files that have not been
The MDS Objects API currently expects files that have not been
uploaded yet. For files we only needs to index, not upload, create
object records manually by generating the expected object fields.
TODO: update the MDS objects API to not create upload URLs if the
relevant data is provided.
"""

def _get_buckets_and_filename_from_urls(submitted_metadata, urls):
def _get_filename_from_urls(submitted_metadata, urls):
file_name = ""
bucket_urls = []
if not urls:
logging.warning(f"No URLs provided for: {submitted_metadata}")
for url in urls:
Expand All @@ -552,19 +550,12 @@ def _get_buckets_and_filename_from_urls(submitted_metadata, urls):
logging.warning(
f"Received multiple URLs with different file names; will use the first URL (file name '{file_name}'): {submitted_metadata}"
)
parsed = urlparse(url)
_bucket_url = f"{parsed.scheme}://{parsed.netloc}"
bucket_urls.append(_bucket_url)
return file_name, bucket_urls
return file_name

file_name_from_url, bucket_urls = _get_buckets_and_filename_from_urls(
submitted_metadata, urls
)
file_name_from_url = _get_filename_from_urls(submitted_metadata, urls)
if not file_name:
file_name = file_name_from_url

_, file_ext = os.path.splitext(file_name)
uploader = self._auth_provider._token_info.get("sub")
now = str(datetime.utcnow())
metadata = {
"type": "package",
Expand All @@ -577,11 +568,6 @@ def _get_buckets_and_filename_from_urls(submitted_metadata, urls):
"hashes": hashes,
"contents": contents or None,
},
"_resource_paths": authz,
"_uploader_id": uploader,
"_buckets": bucket_urls,
"_filename": file_name,
"_file_extension": file_ext,
"_upload_status": "uploaded",
}
return metadata
2 changes: 1 addition & 1 deletion gen3/submission.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import itertools
import json
import requests
import pandas as pd
import os
from cdislogging import get_logger
import pandas as pd

from gen3.utils import raise_for_status

Expand Down
2 changes: 1 addition & 1 deletion gen3/tools/bundle/ingest_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from drsclient.client import DrsClient
from gen3.auth import Gen3Auth
from gen3.utils import UUID_FORMAT, SIZE_FORMAT, _verify_format, _standardize_str
from gen3.tools.indexing.manifest_columns import (
from gen3.tools.utils import (
GUID_COLUMN_NAMES,
SIZE_COLUMN_NAMES,
BUNDLENAME_COLUMN_NAME,
Expand Down
1 change: 1 addition & 0 deletions gen3/tools/download/drs_resolvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ def resolve_drs_using_commons_mds(
"commons_mds": resolve_drs_using_commons_mds,
"dataguids_dist": resolve_compact_drs_using_indexd_dist,
"dataguids": resolve_compact_drs_using_official_resolver,
# TODO "identifiers.org"
}


Expand Down
Loading

0 comments on commit ac38754

Please sign in to comment.