diff --git a/.github/main.workflow b/.github/main.workflow new file mode 100644 index 000000000..fe47736a4 --- /dev/null +++ b/.github/main.workflow @@ -0,0 +1,9 @@ +workflow "Run python formatter" { + on = "pull_request" + resolves = ["Run wool"] +} + +action "Run wool" { + uses = "uc-cdis/wool@master" + secrets = ["GITHUB_TOKEN"] +} diff --git a/.travis.yml b/.travis.yml index 18e2447b0..9d89a1441 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,6 @@ dist: xenial language: python python: - - "2.7" - "3.6" sudo: false diff --git a/Dockerfile b/Dockerfile index 9aaca7060..0003b230e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,26 +1,39 @@ # To run: docker run -v /path/to/wsgi.py:/var/www/indexd/wsgi.py --name=indexd -p 81:80 indexd # To check running container: docker exec -it indexd /bin/bash -FROM quay.io/cdis/py27base:pybase2-1.0.2 +FROM quay.io/cdis/python-nginx:pybase3-1.0.0 -MAINTAINER CDIS -RUN mkdir /var/www/indexd \ - && chown www-data /var/www/indexd +ENV appname=indexd -COPY . /indexd -COPY ./deployment/uwsgi/uwsgi.ini /etc/uwsgi/uwsgi.ini +RUN apk update \ + && apk add postgresql-libs postgresql-dev libffi-dev libressl-dev \ + && apk add linux-headers musl-dev gcc \ + && apk add curl bash git vim -WORKDIR /indexd +COPY . /$appname +COPY ./deployment/uwsgi/uwsgi.ini /etc/uwsgi/uwsgi.ini +COPY ./deployment/uwsgi/wsgi.py /$appname/wsgi.py +WORKDIR /$appname -RUN python -m pip install -r requirements.txt -RUN COMMIT=`git rev-parse HEAD` && echo "COMMIT=\"${COMMIT}\"" >indexd/index/version_data.py -RUN VERSION=`git describe --always --tags` && echo "VERSION=\"${VERSION}\"" >>indexd/index/version_data.py -RUN python setup.py install +RUN python -m pip install --upgrade pip \ + && python -m pip install --upgrade setuptools \ + && pip install -r requirements.txt +RUN mkdir -p /var/www/$appname \ + && mkdir -p /var/www/.cache/Python-Eggs/ \ + && mkdir /run/nginx/ \ + && ln -sf /dev/stdout /var/log/nginx/access.log \ + && ln -sf /dev/stderr /var/log/nginx/error.log \ + && chown nginx -R /var/www/.cache/Python-Eggs/ \ + && chown nginx /var/www/$appname EXPOSE 80 -WORKDIR /var/www/indexd +RUN COMMIT=`git rev-parse HEAD` && echo "COMMIT=\"${COMMIT}\"" >$appname/version_data.py \ + && VERSION=`git describe --always --tags` && echo "VERSION=\"${VERSION}\"" >>$appname/version_data.py \ + && python setup.py install + +WORKDIR /var/www/$appname CMD /dockerrun.sh diff --git a/bin/index_admin.py b/bin/index_admin.py index 2d1f32fc8..4471c5158 100644 --- a/bin/index_admin.py +++ b/bin/index_admin.py @@ -2,7 +2,8 @@ import sys from cdislogging import get_logger -logger = get_logger('index_admin') +logger = get_logger("index_admin") + def main(path, action=None, username=None, password=None): sys.path.append(path) @@ -11,45 +12,45 @@ def main(path, action=None, username=None, password=None): except ImportError: logger.info("Can't import local_settings, import from default") from indexd.default_settings import settings - driver = settings['auth'] - index_driver = settings['config']['INDEX']['driver'] - alias_driver = settings['config']['ALIAS']['driver'] - if action == 'create': + driver = settings["auth"] + index_driver = settings["config"]["INDEX"]["driver"] + alias_driver = settings["config"]["ALIAS"]["driver"] + if action == "create": try: driver.add(username, password) - logger.info('User {} created'.format(username)) + logger.info("User {} created".format(username)) except Exception as e: - logger.error(e.message) + logger.error(e) - elif action == 'delete': + elif action == "delete": try: driver.delete(username) - logger.info('User {} deleted'.format(username)) + logger.info("User {} deleted".format(username)) except Exception as e: - logger.error(e.message) + logger.error(e) - elif action == 'migrate_database': + elif action == "migrate_database": try: - logger.info('Start database migration') + logger.info("Start database migration") alias_driver.migrate_alias_database() index_driver.migrate_index_database() except Exception as e: - logger.error(e.message) -if __name__ == '__main__': + logger.error(e) + + +if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - '--path', - default='/var/www/indexd/', - help='path to find local_settings.py', + "--path", default="/var/www/indexd/", help="path to find local_settings.py" ) - subparsers = parser.add_subparsers(title='action', dest='action') - create = subparsers.add_parser('create') - delete = subparsers.add_parser('delete') - migrate = subparsers.add_parser('migrate_database') - create.add_argument('--username', required=True) - create.add_argument('--password', required=True) - delete.add_argument('--username', required=True) + subparsers = parser.add_subparsers(title="action", dest="action") + create = subparsers.add_parser("create") + delete = subparsers.add_parser("delete") + migrate = subparsers.add_parser("migrate_database") + create.add_argument("--username", required=True) + create.add_argument("--password", required=True) + delete.add_argument("--username", required=True) args = parser.parse_args() main(**args.__dict__) diff --git a/deployment/uwsgi/uwsgi.ini b/deployment/uwsgi/uwsgi.ini index f44001532..662beb767 100644 --- a/deployment/uwsgi/uwsgi.ini +++ b/deployment/uwsgi/uwsgi.ini @@ -2,9 +2,11 @@ protocol = uwsgi socket = /var/run/gen3/uwsgi.sock buffer-size = 32768 +uid = nginx +gid = nginx +chown-socket = nginx:nginx chmod-socket = 666 master = true -processes = 2 harakiri-verbose = true # No global HARAKIRI, using only user HARAKIRI, because export overwrites it # Cannot overwrite global HARAKIRI with user's: https://git.io/fjYuD @@ -16,13 +18,9 @@ reload-mercy = 45 mule-reload-mercy = 45 disable-logging = true wsgi-file=/indexd/wsgi.py -plugins = python +plugins = python3 vacuum = true -uid = www-data -gid = www-data -pythonpath = /var/www/indexd/ pythonpath = /indexd/ -pythonpath = /usr/local/lib/python2.7/dist-packages/ # Initialize application in worker processes, not master. This prevents the # workers from all trying to open the same database connections at startup. lazy = true diff --git a/wsgi.py b/deployment/uwsgi/wsgi.py similarity index 53% rename from wsgi.py rename to deployment/uwsgi/wsgi.py index c057d7ef4..1871e9130 100755 --- a/wsgi.py +++ b/deployment/uwsgi/wsgi.py @@ -1,4 +1,5 @@ from indexd import get_app import os -os.environ['INDEXD_SETTINGS'] = '/var/www/indexd/' + +os.environ["INDEXD_SETTINGS"] = "/var/www/indexd/" application = get_app() diff --git a/indexd/__init__.py b/indexd/__init__.py index 81dda98eb..26fa5c825 100644 --- a/indexd/__init__.py +++ b/indexd/__init__.py @@ -1 +1 @@ -from .app import get_app +from .app import get_app diff --git a/indexd/alias/blueprint.py b/indexd/alias/blueprint.py index d67ae6a3f..77bcdb886 100644 --- a/indexd/alias/blueprint.py +++ b/indexd/alias/blueprint.py @@ -14,80 +14,82 @@ from .errors import RevisionMismatch -blueprint = flask.Blueprint('alias', __name__) +blueprint = flask.Blueprint("alias", __name__) blueprint.config = {} blueprint.alias_driver = None ACCEPTABLE_HASHES = { - 'md5': re.compile(r'^[0-9a-f]{32}$').match, - 'sha1': re.compile(r'^[0-9a-f]{40}$').match, - 'sha256': re.compile(r'^[0-9a-f]{64}$').match, - 'sha512': re.compile(r'^[0-9a-f]{128}$').match, + "md5": re.compile(r"^[0-9a-f]{32}$").match, + "sha1": re.compile(r"^[0-9a-f]{40}$").match, + "sha256": re.compile(r"^[0-9a-f]{64}$").match, + "sha512": re.compile(r"^[0-9a-f]{128}$").match, } + def validate_hashes(**hashes): - ''' + """ Validate hashes against known and valid hashing algorithms. - ''' + """ if not all(h in ACCEPTABLE_HASHES for h in hashes): - raise UserError('invalid hash types specified') + raise UserError("invalid hash types specified") if not all(ACCEPTABLE_HASHES[h](v) for h, v in hashes.items()): - raise UserError('invalid hash values specified') + raise UserError("invalid hash values specified") + -@blueprint.route('/alias/', methods=['GET']) +@blueprint.route("/alias/", methods=["GET"]) def get_alias(): - ''' + """ Returns a list of records. - ''' - limit = flask.request.args.get('limit') - try: limit = 100 if limit is None else int(limit) + """ + limit = flask.request.args.get("limit") + try: + limit = 100 if limit is None else int(limit) except ValueError as err: - raise UserError('limit must be an integer') + raise UserError("limit must be an integer") if limit <= 0 or limit > 1024: - raise UserError('limit must be between 1 and 1024') + raise UserError("limit must be between 1 and 1024") - size = flask.request.args.get('size') - try: size = size if size is None else int(size) + size = flask.request.args.get("size") + try: + size = size if size is None else int(size) except ValueError as err: - raise UserError('size must be an integer') + raise UserError("size must be an integer") if size is not None and size < 0: - raise UserError('size must be > 0') + raise UserError("size must be > 0") - start = flask.request.args.get('start') + start = flask.request.args.get("start") - hashes = flask.request.args.getlist('hash') - hashes = {h:v for h,v in map(lambda x: x.split(':', 1), hashes)} + hashes = flask.request.args.getlist("hash") + hashes = {h: v for h, v in [x.split(":", 1) for x in hashes]} # TODO FIXME this needs reworking validate_hashes(**hashes) hashes = hashes if hashes else None if limit < 0 or limit > 1024: - raise UserError('limit must be between 0 and 1024') + raise UserError("limit must be between 0 and 1024") aliases = blueprint.alias_driver.aliases( - start=start, - limit=limit, - size=size, - hashes=hashes, + start=start, limit=limit, size=size, hashes=hashes ) base = { - 'aliases': aliases, - 'limit': limit, - 'start': start, - 'size': size, - 'hashes': hashes, + "aliases": aliases, + "limit": limit, + "start": start, + "size": size, + "hashes": hashes, } return flask.jsonify(base), 200 -#@blueprint.route('/alias/', methods=['GET']) -#def get_alias_record(record): + +# @blueprint.route('/alias/', methods=['GET']) +# def get_alias_record(record): # ''' # Returns a record. # ''' @@ -95,26 +97,30 @@ def get_alias(): # # return flask.jsonify(ret), 200 -@blueprint.route('/alias/', methods=['PUT']) + +@blueprint.route("/alias/", methods=["PUT"]) @authorize def put_alias_record(record): - ''' + """ Create or replace an existing record. - ''' - try: jsonschema.validate(flask.request.json, PUT_RECORD_SCHEMA) + """ + try: + jsonschema.validate(flask.request.json, PUT_RECORD_SCHEMA) except jsonschema.ValidationError as err: raise UserError(err) - rev = flask.request.args.get('rev') + rev = flask.request.args.get("rev") - size = flask.request.json.get('size') - hashes = flask.request.json.get('hashes') - release = flask.request.json.get('release') - metastring = flask.request.json.get('metadata') - host_authorities = flask.request.json.get('host_authorities') - keeper_authority = flask.request.json.get('keeper_authority') + size = flask.request.json.get("size") + hashes = flask.request.json.get("hashes") + release = flask.request.json.get("release") + metastring = flask.request.json.get("metadata") + host_authorities = flask.request.json.get("host_authorities") + keeper_authority = flask.request.json.get("keeper_authority") - record, rev = blueprint.alias_driver.upsert(record, rev, + record, rev = blueprint.alias_driver.upsert( + record, + rev, size=size, hashes=hashes, release=release, @@ -123,46 +129,50 @@ def put_alias_record(record): keeper_authority=keeper_authority, ) - ret = { - 'name': record, - 'rev': rev, - } + ret = {"name": record, "rev": rev} return flask.jsonify(ret), 200 -@blueprint.route('/alias/', methods=['DELETE']) + +@blueprint.route("/alias/", methods=["DELETE"]) @authorize def delete_alias_record(record): - ''' + """ Delete an alias. - ''' - rev = flask.request.args.get('rev') + """ + rev = flask.request.args.get("rev") blueprint.alias_driver.delete(record, rev) - return '', 200 + return "", 200 + @blueprint.errorhandler(NoRecordFound) def handle_no_record_error(err): return flask.jsonify(error=str(err)), 404 + @blueprint.errorhandler(MultipleRecordsFound) def handle_multiple_records_error(err): return flask.jsonify(error=str(err)), 409 + @blueprint.errorhandler(UserError) def handle_user_error(err): return flask.jsonify(error=str(err)), 400 + @blueprint.errorhandler(AuthError) def handle_auth_error(err): return flask.jsonify(error=str(err)), 403 + @blueprint.errorhandler(RevisionMismatch) def handle_revision_mismatch(err): return flask.jsonify(error=str(err)), 409 + @blueprint.record def get_config(setup_state): - config = setup_state.app.config['ALIAS'] - blueprint.alias_driver = config['driver'] + config = setup_state.app.config["ALIAS"] + blueprint.alias_driver = config["driver"] diff --git a/indexd/alias/driver.py b/indexd/alias/driver.py index fa41e7472..3c1663491 100644 --- a/indexd/alias/driver.py +++ b/indexd/alias/driver.py @@ -2,64 +2,72 @@ from ..driver_base import SQLAlchemyDriverBase -class AliasDriverABC(SQLAlchemyDriverBase): - ''' +class AliasDriverABC(SQLAlchemyDriverBase, metaclass=abc.ABCMeta): + """ Alias Driver Abstract Base Class Driver interface for interacting with alias backends. - ''' + """ + def __init__(self, conn, **config): super(AliasDriverABC, self).__init__(conn, **config) - __metaclass__ = abc.ABCMeta - @abc.abstractmethod - def aliases(self, limit=100, start='', size=None, urls=None, hashes=None): - ''' + def aliases(self, limit=100, start="", size=None, urls=None, hashes=None): + """ Returns a list of aliases. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod - def upsert(self, name, rev=None, size=None, hashes={}, release=None, - metadata=None, host_authorities=[], keeper_authority=None, **kwargs): - ''' + def upsert( + self, + name, + rev=None, + size=None, + hashes=None, + release=None, + metastring=None, + host_authorities=None, + keeper_authority=None, + ): + """ Update or insert alias record. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def get(self, did): - ''' + """ Gets a record given the record id. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def delete(self, did, rev): - ''' + """ Deletes record. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def __contains__(self, did): - ''' + """ Returns True if record is stored by backend. Returns False otherwise. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def __iter__(self): - ''' + """ Returns an iterator over unique records stored by backend. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def __len__(self): - ''' + """ Returns the number of unique records stored by backend. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") diff --git a/indexd/alias/drivers/alchemy.py b/indexd/alias/drivers/alchemy.py index 2c6545f75..7182055fc 100644 --- a/indexd/alias/drivers/alchemy.py +++ b/indexd/alias/drivers/alchemy.py @@ -30,93 +30,103 @@ class AliasSchemaVersion(Base): - ''' + """ Table to track current database's schema version - ''' - __tablename__ = 'alias_schema_version' + """ + + __tablename__ = "alias_schema_version" version = Column(Integer, primary_key=True) + class AliasRecord(Base): - ''' + """ Base alias record representation. - ''' - __tablename__ = 'alias_record' + """ + + __tablename__ = "alias_record" name = Column(String, primary_key=True) rev = Column(String) size = Column(BigInteger) - hashes = relationship('AliasRecordHash', - backref='alias_record', - cascade='all, delete-orphan', + hashes = relationship( + "AliasRecordHash", backref="alias_record", cascade="all, delete-orphan" ) release = Column(String) metastring = Column(String) - host_authorities = relationship('AliasRecordHostAuthority', - backref='alias_record', - cascade='all, delete-orphan', + host_authorities = relationship( + "AliasRecordHostAuthority", backref="alias_record", cascade="all, delete-orphan" ) keeper_authority = Column(String) + class AliasRecordHash(Base): - ''' + """ Base alias record hash representation. - ''' - __tablename__ = 'alias_record_hash' + """ + + __tablename__ = "alias_record_hash" - name = Column(String, ForeignKey('alias_record.name'), primary_key=True) + name = Column(String, ForeignKey("alias_record.name"), primary_key=True) hash_type = Column(String, primary_key=True) hash_value = Column(String) + class AliasRecordHostAuthority(Base): - ''' + """ Base alias record host authority representation. - ''' - __tablename__ = 'alias_record_host_authority' + """ + + __tablename__ = "alias_record_host_authority" - name = Column(String, ForeignKey('alias_record.name'), primary_key=True) + name = Column(String, ForeignKey("alias_record.name"), primary_key=True) host = Column(String, primary_key=True) + class SQLAlchemyAliasDriver(AliasDriverABC): - ''' + """ SQLAlchemy implementation of alias driver. - ''' + """ def __init__(self, conn, logger=None, auto_migrate=True, **config): - ''' + """ Initialize the SQLAlchemy database driver. - ''' + """ super(SQLAlchemyAliasDriver, self).__init__(conn, **config) - self.logger = logger or get_logger('SQLAlchemyAliasDriver') + self.logger = logger or get_logger("SQLAlchemyAliasDriver") Base.metadata.bind = self.engine self.Session = sessionmaker(bind=self.engine) is_empty_db = is_empty_database(driver=self) Base.metadata.create_all() if is_empty_db: - init_schema_version(driver=self, model=AliasSchemaVersion, version=CURRENT_SCHEMA_VERSION) + init_schema_version( + driver=self, model=AliasSchemaVersion, version=CURRENT_SCHEMA_VERSION + ) if auto_migrate: self.migrate_alias_database() def migrate_alias_database(self): - ''' + """ migrate alias database to match CURRENT_SCHEMA_VERSION - ''' + """ migrate_database( - driver=self, migrate_functions=SCHEMA_MIGRATION_FUNCTIONS, + driver=self, + migrate_functions=SCHEMA_MIGRATION_FUNCTIONS, current_schema_version=CURRENT_SCHEMA_VERSION, - model=AliasSchemaVersion) + model=AliasSchemaVersion, + ) @property @contextmanager def session(self): - ''' + """ Provide a transactional scope around a series of operations. - ''' + """ session = self.Session() try: @@ -129,9 +139,9 @@ def session(self): session.close() def aliases(self, limit=100, start=None, size=None, hashes=None): - ''' + """ Returns list of records stored by the backend. - ''' + """ with self.session as session: query = session.query(AliasRecord) @@ -142,12 +152,12 @@ def aliases(self, limit=100, start=None, size=None, hashes=None): query = query.filter(AliasRecord.size == size) if hashes is not None: - for h,v in hashes.items(): - subq = ( - session.query(AliasRecordHash.name) - .filter(and_( - AliasRecordHash.hash_type == h, - AliasRecordHash.hash_value == v)) + for h, v in hashes.items(): + subq = session.query(AliasRecordHash.name).filter( + and_( + AliasRecordHash.hash_type == h, + AliasRecordHash.hash_value == v, + ) ) query = query.filter(AliasRecord.name.in_(subq.subquery())) @@ -156,36 +166,48 @@ def aliases(self, limit=100, start=None, size=None, hashes=None): return [i.name for i in query] - def upsert(self, name, rev=None, size=None, hashes={}, release=None, - metastring=None, host_authorities=[], keeper_authority=None): - ''' + def upsert( + self, + name, + rev=None, + size=None, + hashes=None, + release=None, + metastring=None, + host_authorities=None, + keeper_authority=None, + ): + """ Updates or inserts a new record. - ''' + """ + + hashes = hashes or {} + host_authorities = host_authorities or [] with self.session as session: query = session.query(AliasRecord) query = query.filter(AliasRecord.name == name) - try: record = query.one() + try: + record = query.one() except NoResultFound as err: record = AliasRecord() except MultipleResultsFound as err: - raise MultipleRecordsFound('multiple records found') + raise MultipleRecordsFound("multiple records found") record.name = name if rev is not None and record.rev and rev != record.rev: - raise RevisionMismatch('revision mismatch') + raise RevisionMismatch("revision mismatch") if size is not None: record.size = size if hashes is not None: - record.hashes = [AliasRecordHash( - name=record, - hash_type=h, - hash_value=v, - ) for h,v in hashes.items()] + record.hashes = [ + AliasRecordHash(name=record, hash_type=h, hash_value=v) + for h, v in hashes.items() + ] if release is not None: record.release = release @@ -194,10 +216,10 @@ def upsert(self, name, rev=None, size=None, hashes={}, release=None, record.metastring = metastring if host_authorities is not None: - record.host_authorities = [AliasRecordHostAuthority( - name=name, - host=host, - ) for host in host_authorities] + record.host_authorities = [ + AliasRecordHostAuthority(name=name, host=host) + for host in host_authorities + ] if keeper_authority is not None: record.keeper_authority = keeper_authority @@ -209,18 +231,19 @@ def upsert(self, name, rev=None, size=None, hashes={}, release=None, return record.name, record.rev def get(self, name): - ''' + """ Gets a record given the record name. - ''' + """ with self.session as session: query = session.query(AliasRecord) query = query.filter(AliasRecord.name == name) - try: record = query.one() + try: + record = query.one() except NoResultFound as err: - raise NoRecordFound('no record found') + raise NoRecordFound("no record found") except MultipleResultsFound as err: - raise MultipleRecordsFound('multiple records found') + raise MultipleRecordsFound("multiple records found") rev = record.rev @@ -232,42 +255,43 @@ def get(self, name): keeper_authority = record.keeper_authority ret = { - 'name': name, - 'rev': rev, - 'size': size, - 'hashes': hashes, - 'release': release, - 'metadata': metastring, - 'host_authorities': host_authorities, - 'keeper_authority': keeper_authority, + "name": name, + "rev": rev, + "size": size, + "hashes": hashes, + "release": release, + "metadata": metastring, + "host_authorities": host_authorities, + "keeper_authority": keeper_authority, } return ret def delete(self, name, rev=None): - ''' + """ Removes a record. - ''' + """ with self.session as session: query = session.query(AliasRecord) query = query.filter(AliasRecord.name == name) - try: record = query.one() + try: + record = query.one() except NoResultFound as err: - raise NoRecordFound('no record found') + raise NoRecordFound("no record found") except MultipleResultsFound as err: - raise MultipleRecordsFound('multiple records found') + raise MultipleRecordsFound("multiple records found") if rev is not None and rev != record.rev: - raise RevisionMismatch('revision mismatch') + raise RevisionMismatch("revision mismatch") session.delete(record) def __contains__(self, record): - ''' + """ Returns True if record is stored by backend. Returns False otherwise. - ''' + """ with self.session as session: query = session.query(AliasRecord) query = query.filter(AliasRecord.name == record) @@ -275,25 +299,27 @@ def __contains__(self, record): return query.exists() def __iter__(self): - ''' + """ Iterator over unique records stored by backend. - ''' + """ with self.session as session: for i in session.query(AliasRecord): yield i.name def __len__(self): - ''' + """ Number of unique records stored by backend. - ''' + """ with self.session as session: return session.query(AliasRecord).count() def migrate_1(session, **kwargs): session.execute( - "ALTER TABLE {} ALTER COLUMN size TYPE bigint;" - .format(AliasRecord.__tablename__)) + "ALTER TABLE {} ALTER COLUMN size TYPE bigint;".format( + AliasRecord.__tablename__ + ) + ) # ordered schema migration functions that the index should correspond to diff --git a/indexd/alias/errors.py b/indexd/alias/errors.py index 037eefe68..eed8883c3 100644 --- a/indexd/alias/errors.py +++ b/indexd/alias/errors.py @@ -1,19 +1,22 @@ class BaseAliasError(Exception): - ''' + """ Base alias error. - ''' + """ + class NoRecordFound(BaseAliasError): - ''' + """ No record error. - ''' + """ + class MultipleRecordsFound(BaseAliasError): - ''' + """ Multiple recordss error. - ''' + """ + class RevisionMismatch(BaseAliasError): - ''' + """ Revision mismatch. - ''' + """ diff --git a/indexd/alias/schema.py b/indexd/alias/schema.py index 54dcbf776..d7601342f 100644 --- a/indexd/alias/schema.py +++ b/indexd/alias/schema.py @@ -1,102 +1,47 @@ PUT_RECORD_SCHEMA = { - "$schema": "http://json-schema.org/schema#", - "type": "object", - "additionalProperties": False, - "description": "Update or create an alias", - "required": [ - "size", - "hashes", - "release" - ], - "properties": { - "release": { - "description": "Access control for this data", - "enum": [ - "public", - "private", - "controlled" - ] - }, - "size": { - "description": "Size of the data being indexed in bytes", - "type": "integer", - "minimum": 0 - }, - "keeper_authority": { - "description": "Who controls the alias pointing to this data?", - "type": "string" - }, - "host_authorities": { - "description": "Who hosts the data?", - "type": "array", - "items": { - "type": "string" - } - }, - "metastring": { - "description": "Further dataset identifiers", - "type": "string" - }, - "hashes": { - "type": "object", - "properties": { - "md5": { - "type": "string", - "pattern": "^[0-9a-f]{32}$" - }, - "sha1": { - "type": "string", - "pattern": "^[0-9a-f]{40}$" - }, - "sha256": { - "type": "string", - "pattern": "^[0-9a-f]{64}$" - }, - "sha512": { - "type": "string", - "pattern": "^[0-9a-f]{128}$" + "$schema": "http://json-schema.org/schema#", + "type": "object", + "additionalProperties": False, + "description": "Update or create an alias", + "required": ["size", "hashes", "release"], + "properties": { + "release": { + "description": "Access control for this data", + "enum": ["public", "private", "controlled"], + }, + "size": { + "description": "Size of the data being indexed in bytes", + "type": "integer", + "minimum": 0, + }, + "keeper_authority": { + "description": "Who controls the alias pointing to this data?", + "type": "string", + }, + "host_authorities": { + "description": "Who hosts the data?", + "type": "array", + "items": {"type": "string"}, + }, + "metastring": {"description": "Further dataset identifiers", "type": "string"}, + "hashes": { + "type": "object", + "properties": { + "md5": {"type": "string", "pattern": "^[0-9a-f]{32}$"}, + "sha1": {"type": "string", "pattern": "^[0-9a-f]{40}$"}, + "sha256": {"type": "string", "pattern": "^[0-9a-f]{64}$"}, + "sha512": {"type": "string", "pattern": "^[0-9a-f]{128}$"}, + "crc": {"type": "string", "pattern": "^[0-9a-f]{8}$"}, + "etag": {"type": "string", "pattern": "^[0-9a-f]{32}(-\d+)?$"}, + }, + "anyOf": [ + {"required": ["md5"]}, + {"required": ["sha1"]}, + {"required": ["sha256"]}, + {"required": ["sha512"]}, + {"required": ["crc"]}, + {"required": ["etag"]}, + ], }, - "crc": { - "type": "string", - "pattern": "^[0-9a-f]{8}$" - }, - "etag": { - "type": "string", - "pattern": "^[0-9a-f]{32}(-\d+)?$" - } - }, - "anyOf": [ - { - "required": [ - "md5" - ] - }, - { - "required": [ - "sha1" - ] - }, - { - "required": [ - "sha256" - ] - }, - { - "required": [ - "sha512" - ] - }, - { - "required": [ - "crc" - ] - }, - { - "required": [ - "etag" - ] - } - ] - } - } + }, } diff --git a/indexd/app.py b/indexd/app.py index 21272b8ab..99c7c926c 100644 --- a/indexd/app.py +++ b/indexd/app.py @@ -16,8 +16,8 @@ def app_init(app, settings=None): app.logger.addHandler(cdislogging.get_stream_handler()) if not settings: from .default_settings import settings - app.config.update(settings['config']) - app.auth = settings['auth'] + app.config.update(settings["config"]) + app.auth = settings["auth"] app.register_blueprint(indexd_bulk_blueprint) app.register_blueprint(indexd_index_blueprint) app.register_blueprint(indexd_alias_blueprint) @@ -27,10 +27,10 @@ def app_init(app, settings=None): def get_app(): - app = flask.Flask('indexd') + app = flask.Flask("indexd") - if 'INDEXD_SETTINGS' in os.environ: - sys.path.append(os.environ['INDEXD_SETTINGS']) + if "INDEXD_SETTINGS" in os.environ: + sys.path.append(os.environ["INDEXD_SETTINGS"]) settings = None try: diff --git a/indexd/auth/driver.py b/indexd/auth/driver.py index 78a58035a..bba8258da 100644 --- a/indexd/auth/driver.py +++ b/indexd/auth/driver.py @@ -2,25 +2,23 @@ from ..driver_base import SQLAlchemyDriverBase -class AuthDriverABC(SQLAlchemyDriverBase): - ''' +class AuthDriverABC(SQLAlchemyDriverBase, metaclass=abc.ABCMeta): + """ Auth Driver Abstract Base Class Driver interface for authorization. - ''' + """ + def __init__(self, conn, **config): super(AuthDriverABC, self).__init__(conn, **config) - - __metaclass__ = abc.ABCMeta - @abc.abstractmethod def auth(self, username, password): - ''' + """ Returns a dict of user information. Raises AuthError otherwise. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def authz(self, method, resource): @@ -32,16 +30,16 @@ def authz(self, method, resource): @abc.abstractmethod def add(self, username, password): - ''' + """ Create an user. Raises AuthError if user already exists. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def delete(self, username): - ''' + """ Delete an user Raises AuthError if user doesn't exist. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") diff --git a/indexd/auth/drivers/alchemy.py b/indexd/auth/drivers/alchemy.py index 7e88b29e9..1acf61501 100644 --- a/indexd/auth/drivers/alchemy.py +++ b/indexd/auth/drivers/alchemy.py @@ -21,24 +21,27 @@ Base = declarative_base() + class AuthRecord(Base): - ''' + """ Base auth record representation. - ''' - __tablename__ = 'auth_record' + """ + + __tablename__ = "auth_record" username = Column(String, primary_key=True) password = Column(String) + class SQLAlchemyAuthDriver(AuthDriverABC): - ''' + """ SQLAlchemy implementation of auth driver. - ''' + """ def __init__(self, conn, arborist=None, **config): - ''' + """ Initialize the SQLAlchemy database driver. - ''' + """ super(SQLAlchemyAuthDriver, self).__init__(conn, **config) Base.metadata.bind = self.engine Base.metadata.create_all() @@ -50,9 +53,9 @@ def __init__(self, conn, arborist=None, **config): @property @contextmanager def session(self): - ''' + """ Provide a transactional scope around a series of operations. - ''' + """ session = self.Session() try: @@ -66,36 +69,40 @@ def session(self): @staticmethod def digest(password): - ''' + """ Digests a string. - ''' - return hashlib.sha256(password.encode('utf-8')).hexdigest() + """ + return hashlib.sha256(password.encode("utf-8")).hexdigest() def add(self, username, password): password = self.digest(password) with self.session as session: - if (session.query(AuthRecord) + if ( + session.query(AuthRecord) .filter(AuthRecord.username == username) - .first()): - raise AuthError('User {} already exists'.format(username)) + .first() + ): + raise AuthError("User {} already exists".format(username)) - new_record = AuthRecord( - username=username, password=password) + new_record = AuthRecord(username=username, password=password) session.add(new_record) def delete(self, username): with self.session as session: - user = session.query(AuthRecord).filter( - AuthRecord.username == username).first() + user = ( + session.query(AuthRecord) + .filter(AuthRecord.username == username) + .first() + ) if not user: raise AuthError("User {} doesn't exist".format(username)) session.delete(user) def auth(self, username, password): - ''' + """ Returns a dict of user information. Raises AutheError otherwise. - ''' + """ password = self.digest(password) with self.session as session: query = session.query(AuthRecord) @@ -104,12 +111,13 @@ def auth(self, username, password): query = query.filter(AuthRecord.username == username) query = query.filter(AuthRecord.password == password) - try: query.one() + try: + query.one() except NoResultFound as err: - raise AuthError('username / password mismatch') + raise AuthError("username / password mismatch") context = { - 'username': username, + "username": username, # TODO include other user information } @@ -117,7 +125,7 @@ def auth(self, username, password): def authz(self, method, resource): if not self.arborist: - raise AuthError('username / password mismatch') + raise AuthError("username / password mismatch") if not resource: raise AuthError("Permission denied.") if not self.arborist.auth_request(get_jwt_token(), "indexd", method, resource): diff --git a/indexd/auth/errors.py b/indexd/auth/errors.py index d623e2552..b1857461f 100644 --- a/indexd/auth/errors.py +++ b/indexd/auth/errors.py @@ -1,4 +1,4 @@ class AuthError(Exception): - ''' + """ Base auth error. - ''' + """ diff --git a/indexd/blueprint.py b/indexd/blueprint.py index 3e83bd700..1a0b9d8f7 100644 --- a/indexd/blueprint.py +++ b/indexd/blueprint.py @@ -13,46 +13,41 @@ from indexd.alias.errors import NoRecordFound as AliasNoRecordFound from indexd.index.errors import NoRecordFound as IndexNoRecordFound -blueprint = flask.Blueprint('cross', __name__) +blueprint = flask.Blueprint("cross", __name__) blueprint.config = dict() blueprint.index_driver = None blueprint.alias_driver = None blueprint.dist = [] -@blueprint.route('/alias/', methods=['GET']) + +@blueprint.route("/alias/", methods=["GET"]) def get_alias(alias): - ''' + """ Return alias associated information. - ''' + """ info = blueprint.alias_driver.get(alias) start = 0 limit = 100 - size = info['size'] - hashes = info['hashes'] + size = info["size"] + hashes = info["hashes"] urls = blueprint.index_driver.get_urls( - size=size, - hashes=hashes, - start=start, - limit=limit, + size=size, hashes=hashes, start=start, limit=limit ) - info.update({ - 'urls': urls, - 'start': start, - 'limit': limit, - }) + info.update({"urls": urls, "start": start, "limit": limit}) return flask.jsonify(info), 200 -@blueprint.route('/', methods=['GET']) + +@blueprint.route("/", methods=["GET"]) def get_record(record): - ''' + """ Returns a record from the local ids, alias, or global resolvers. - ''' + """ try: ret = blueprint.index_driver.get(record) @@ -63,30 +58,32 @@ def get_record(record): try: ret = blueprint.alias_driver.get(record) except AliasNoRecordFound: - if not blueprint.dist or 'no_dist' in flask.request.args: + if not blueprint.dist or "no_dist" in flask.request.args: raise ret = dist_get_record(record) - return flask.jsonify(ret), 200 + def dist_get_record(record): # Sort the list of distributed ID services # Ones with which the request matches a hint will be first # Followed by those that don't match the hint - sorted_dist = sorted(blueprint.dist, key=lambda k: hint_match(record, k['hints']), reverse=True) + sorted_dist = sorted( + blueprint.dist, key=lambda k: hint_match(record, k["hints"]), reverse=True + ) for indexd in sorted_dist: try: - if indexd['type'] == "doi": - fetcher_client = DOIClient(baseurl=indexd['host']) + if indexd["type"] == "doi": + fetcher_client = DOIClient(baseurl=indexd["host"]) res = fetcher_client.get(record) - elif indexd['type'] == "dos": - fetcher_client = DOSClient(baseurl=indexd['host']) + elif indexd["type"] == "dos": + fetcher_client = DOSClient(baseurl=indexd["host"]) res = fetcher_client.get(record) else: - fetcher_client = IndexClient(baseurl=indexd['host']) + fetcher_client = IndexClient(baseurl=indexd["host"]) res = fetcher_client.global_get(record, no_dist=True) except: # a lot of things can go wrong with the get, but in general we don't care here. @@ -94,36 +91,40 @@ def dist_get_record(record): if res: json = res.to_json() - json['from_index_service'] = { - 'host': indexd['host'], - 'name': indexd['name'], + json["from_index_service"] = { + "host": indexd["host"], + "name": indexd["name"], } return json - raise IndexNoRecordFound('no record found') + raise IndexNoRecordFound("no record found") @blueprint.errorhandler(UserError) def handle_user_error(err): return flask.jsonify(error=str(err)), 400 + @blueprint.errorhandler(AuthError) def handle_auth_error(err): return flask.jsonify(error=str(err)), 403 + @blueprint.errorhandler(AliasNoRecordFound) def handle_no_record_error(err): return flask.jsonify(error=str(err)), 404 + @blueprint.errorhandler(IndexNoRecordFound) def handle_no_record_error(err): return flask.jsonify(error=str(err)), 404 + @blueprint.record def get_config(setup_state): - index_config = setup_state.app.config['INDEX'] - alias_config = setup_state.app.config['ALIAS'] - blueprint.index_driver = index_config['driver'] - blueprint.alias_driver = alias_config['driver'] - if 'DIST' in setup_state.app.config: - blueprint.dist = setup_state.app.config['DIST'] + index_config = setup_state.app.config["INDEX"] + alias_config = setup_state.app.config["ALIAS"] + blueprint.index_driver = index_config["driver"] + blueprint.alias_driver = alias_config["driver"] + if "DIST" in setup_state.app.config: + blueprint.dist = setup_state.app.config["DIST"] diff --git a/indexd/bulk/blueprint.py b/indexd/bulk/blueprint.py index cf6dc1cdb..1d37599c2 100644 --- a/indexd/bulk/blueprint.py +++ b/indexd/bulk/blueprint.py @@ -8,33 +8,33 @@ from sqlalchemy.orm import joinedload - -blueprint = flask.Blueprint('bulk', __name__) +blueprint = flask.Blueprint("bulk", __name__) blueprint.config = dict() blueprint.index_driver = None -@blueprint.route('/bulk/documents', methods=['POST']) +@blueprint.route("/bulk/documents", methods=["POST"]) def bulk_get_documents(): """ Returns a list of records. """ ids = flask.request.json if not ids: - raise UserError('No ids provided') + raise UserError("No ids provided") if not isinstance(ids, list): - raise UserError('ids is not a list') + raise UserError("ids is not a list") with blueprint.index_driver.session as session: # Comment it out to compare against the eager loading option. - #query = session.query(IndexRecord) - #query = query.filter(IndexRecord.did.in_(ids) + # query = session.query(IndexRecord) + # query = query.filter(IndexRecord.did.in_(ids) # Use eager loading. query = session.query(IndexRecord) - query = query.options(joinedload(IndexRecord.urls). - joinedload(IndexRecordUrl.url_metadata)) + query = query.options( + joinedload(IndexRecord.urls).joinedload(IndexRecordUrl.url_metadata) + ) query = query.options(joinedload(IndexRecord.acl)) query = query.options(joinedload(IndexRecord.authz)) query = query.options(joinedload(IndexRecord.hashes)) @@ -48,5 +48,5 @@ def bulk_get_documents(): @blueprint.record def get_config(setup_state): - config = setup_state.app.config['INDEX'] - blueprint.index_driver = config['driver'] + config = setup_state.app.config["INDEX"] + blueprint.index_driver = config["driver"] diff --git a/indexd/default_settings.py b/indexd/default_settings.py index 1cadd85a8..53fbb1d64 100644 --- a/indexd/default_settings.py +++ b/indexd/default_settings.py @@ -4,46 +4,44 @@ CONFIG = {} -CONFIG['JSONIFY_PRETTYPRINT_REGULAR'] = False +CONFIG["JSONIFY_PRETTYPRINT_REGULAR"] = False AUTO_MIGRATE = True -CONFIG['INDEX'] = { - 'driver': SQLAlchemyIndexDriver( - 'sqlite:///index.sq3', auto_migrate=AUTO_MIGRATE, echo=True, +CONFIG["INDEX"] = { + "driver": SQLAlchemyIndexDriver( + "sqlite:///index.sq3", + auto_migrate=AUTO_MIGRATE, + echo=True, index_config={ - 'DEFAULT_PREFIX': 'testprefix:', 'ADD_PREFIX_ALIAS': True, - 'PREPEND_PREFIX': True} - ), + "DEFAULT_PREFIX": "testprefix:", + "ADD_PREFIX_ALIAS": True, + "PREPEND_PREFIX": True, + }, + ) } -CONFIG['ALIAS'] = { - 'driver': SQLAlchemyAliasDriver( - 'sqlite:///alias.sq3', auto_migrate=AUTO_MIGRATE, echo=True), +CONFIG["ALIAS"] = { + "driver": SQLAlchemyAliasDriver( + "sqlite:///alias.sq3", auto_migrate=AUTO_MIGRATE, echo=True + ) } -CONFIG['DIST'] = [ +CONFIG["DIST"] = [ { - 'name': 'Other IndexD', - 'host': 'https://indexd.example.io/index/', - 'hints': ['.*ROCKS.*'], - 'type': 'indexd', + "name": "Other IndexD", + "host": "https://indexd.example.io/index/", + "hints": [".*ROCKS.*"], + "type": "indexd", }, + {"name": "DX DOI", "host": "https://doi.org/", "hints": ["10\..*"], "type": "doi"}, { - 'name': 'DX DOI', - 'host': 'https://doi.org/', - 'hints': ['10\..*'], - 'type': 'doi', - }, - { - 'name': 'DOS System', - 'host': 'https://example.com/api/ga4gh/dos/v1/', - 'hints': [], - 'type': 'dos', + "name": "DOS System", + "host": "https://example.com/api/ga4gh/dos/v1/", + "hints": [], + "type": "dos", }, ] -AUTH = SQLAlchemyAuthDriver('sqlite:///auth.sq3') - -settings = {'config': CONFIG, 'auth': AUTH} - +AUTH = SQLAlchemyAuthDriver("sqlite:///auth.sq3") +settings = {"config": CONFIG, "auth": AUTH} diff --git a/indexd/dos/blueprint.py b/indexd/dos/blueprint.py index 8b47c5ee9..82694932b 100644 --- a/indexd/dos/blueprint.py +++ b/indexd/dos/blueprint.py @@ -7,7 +7,7 @@ from indexd.alias.errors import NoRecordFound as AliasNoRecordFound from indexd.index.errors import NoRecordFound as IndexNoRecordFound -blueprint = flask.Blueprint('dos', __name__) +blueprint = flask.Blueprint("dos", __name__) blueprint.config = dict() blueprint.index_driver = None @@ -15,19 +15,19 @@ blueprint.dist = [] -@blueprint.route('/ga4gh/dos/v1/dataobjects/', methods=['GET']) +@blueprint.route("/ga4gh/dos/v1/dataobjects/", methods=["GET"]) def get_dos_record(record): - ''' + """ Returns a record from the local ids, alias, or global resolvers. Returns DOS Schema - ''' + """ try: ret = blueprint.index_driver.get(record) - ret['alias'] = blueprint.index_driver.get_aliases_for_did(record) + ret["alias"] = blueprint.index_driver.get_aliases_for_did(record) except IndexNoRecordFound: try: ret = blueprint.index_driver.get_by_alias(record) - ret['alias'] = blueprint.index_driver.get_aliases_for_did(ret['did']) + ret["alias"] = blueprint.index_driver.get_aliases_for_did(ret["did"]) except IndexNoRecordFound: try: ret = blueprint.alias_driver.get(record) @@ -39,110 +39,113 @@ def get_dos_record(record): return flask.jsonify(indexd_to_dos(ret)), 200 -@blueprint.route('/ga4gh/dos/v1/dataobjects', methods=['GET']) +@blueprint.route("/ga4gh/dos/v1/dataobjects", methods=["GET"]) def list_dos_records(): - ''' + """ Returns a record from the local ids, alias, or global resolvers. Returns DOS Schema - ''' - start = flask.request.args.get('page_token') - limit = flask.request.args.get('page_size') + """ + start = flask.request.args.get("page_token") + limit = flask.request.args.get("page_size") try: limit = 100 if limit is None else int(limit) except ValueError: - raise UserError('limit must be an integer') + raise UserError("limit must be an integer") if limit <= 0 or limit > 1024: - raise UserError('limit must be between 1 and 1024') + raise UserError("limit must be between 1 and 1024") - url = flask.request.args.get('url') + url = flask.request.args.get("url") # Support this in the future when we have # more fully featured aliases? # alias = flask.request.args.get('alias') - checksum = flask.request.args.get('checksum') + checksum = flask.request.args.get("checksum") if checksum: - hashes = {checksum['type']: checksum['checksum']} + hashes = {checksum["type"]: checksum["checksum"]} else: hashes = None records = blueprint.index_driver.ids( - start=start, - limit=limit, - urls=url, - hashes=hashes + start=start, limit=limit, urls=url, hashes=hashes ) for record in records: - record['alias'] = blueprint.index_driver.get_aliases_for_did(record['did']) + record["alias"] = blueprint.index_driver.get_aliases_for_did(record["did"]) - ret = {"data_objects": [indexd_to_dos(record)['data_object'] for record in records]} + ret = {"data_objects": [indexd_to_dos(record)["data_object"] for record in records]} return flask.jsonify(ret), 200 def indexd_to_dos(record): data_object = { - "id": record['did'], - "name": record['file_name'], - 'created': record['created_date'], - 'updated': record['updated_date'], - "size": record['size'], - "version": record['rev'], + "id": record["did"], + "name": record["file_name"], + "created": record["created_date"], + "updated": record["updated_date"], + "size": record["size"], + "version": record["rev"], "description": "", - "mime_type": "" + "mime_type": "", } - data_object['aliases'] = record['alias'] + data_object["aliases"] = record["alias"] # parse out checksums - data_object['checksums'] = [] - for k in record['hashes']: - data_object['checksums'].append( - {'checksum': record['hashes'][k], 'type': k}) + data_object["checksums"] = [] + for k in record["hashes"]: + data_object["checksums"].append({"checksum": record["hashes"][k], "type": k}) # parse out the urls - data_object['urls'] = [] - for url in record['urls']: - url_object = { - 'url': url } - if 'metadata' in record and record['metadata']: - url_object['system_metadata'] = record['metadata'] - if 'urls_metadata' in record and url in record['urls_metadata'] and record['urls_metadata'][url]: - url_object['user_metadata'] = record['urls_metadata'][url] - data_object['urls'].append(url_object) - - result = { "data_object": data_object } + data_object["urls"] = [] + for url in record["urls"]: + url_object = {"url": url} + if "metadata" in record and record["metadata"]: + url_object["system_metadata"] = record["metadata"] + if ( + "urls_metadata" in record + and url in record["urls_metadata"] + and record["urls_metadata"][url] + ): + url_object["user_metadata"] = record["urls_metadata"][url] + data_object["urls"].append(url_object) + + result = {"data_object": data_object} return result @blueprint.errorhandler(UserError) def handle_user_error(err): - ret = { msg: str(err), status_code: 0 } + ret = {msg: str(err), status_code: 0} return flask.jsonify(ret), 400 + @blueprint.errorhandler(AuthError) def handle_auth_error(err): - ret = { msg: str(err), status_code: 0 } + ret = {msg: str(err), status_code: 0} return flask.jsonify(ret), 403 + @blueprint.errorhandler(AliasNoRecordFound) def handle_no_alias_record_error(err): - ret = { msg: str(err), status_code: 0 } + ret = {msg: str(err), status_code: 0} return flask.jsonify(ret), 404 + @blueprint.errorhandler(IndexNoRecordFound) def handle_no_index_record_error(err): - ret = { msg: str(err), status_code: 0 } + ret = {msg: str(err), status_code: 0} return flask.jsonify(ret), 404 + @blueprint.record def get_config(setup_state): - index_config = setup_state.app.config['INDEX'] - alias_config = setup_state.app.config['ALIAS'] - blueprint.index_driver = index_config['driver'] - blueprint.alias_driver = alias_config['driver'] - if 'DIST' in setup_state.app.config: - blueprint.dist = setup_state.app.config['DIST'] + index_config = setup_state.app.config["INDEX"] + alias_config = setup_state.app.config["ALIAS"] + blueprint.index_driver = index_config["driver"] + blueprint.alias_driver = alias_config["driver"] + if "DIST" in setup_state.app.config: + blueprint.dist = setup_state.app.config["DIST"] diff --git a/indexd/driver_base.py b/indexd/driver_base.py index 8c356c19a..b997d6361 100644 --- a/indexd/driver_base.py +++ b/indexd/driver_base.py @@ -4,15 +4,16 @@ Base = declarative_base() + class SQLAlchemyDriverBase(object): - ''' + """ SQLAlchemy implementation of index driver. - ''' + """ def __init__(self, conn, **config): - ''' + """ Initialize the SQLAlchemy database driver. - ''' + """ engine = create_engine(conn, **config) if not database_exists(engine.url): create_database(engine.url) diff --git a/indexd/errors.py b/indexd/errors.py index dfe4eac35..fdd265007 100644 --- a/indexd/errors.py +++ b/indexd/errors.py @@ -1,11 +1,13 @@ from .auth.errors import AuthError + class UserError(Exception): - ''' + """ User error. - ''' + """ + class ConfigurationError(Exception): - ''' + """ Configuration error. - ''' + """ diff --git a/indexd/index/blueprint.py b/indexd/index/blueprint.py index d5e7f49da..2a02b9e91 100644 --- a/indexd/index/blueprint.py +++ b/indexd/index/blueprint.py @@ -19,105 +19,104 @@ from .errors import RevisionMismatch from .errors import UnhealthyCheck -blueprint = flask.Blueprint('index', __name__) +blueprint = flask.Blueprint("index", __name__) blueprint.config = dict() blueprint.index_driver = None ACCEPTABLE_HASHES = { - 'md5': re.compile(r'^[0-9a-f]{32}$').match, - 'sha1': re.compile(r'^[0-9a-f]{40}$').match, - 'sha256': re.compile(r'^[0-9a-f]{64}$').match, - 'sha512': re.compile(r'^[0-9a-f]{128}$').match, - 'crc': re.compile(r'^[0-9a-f]{8}$').match, - 'etag': re.compile(r'^[0-9a-f]{32}(-\d+)?$').match + "md5": re.compile(r"^[0-9a-f]{32}$").match, + "sha1": re.compile(r"^[0-9a-f]{40}$").match, + "sha256": re.compile(r"^[0-9a-f]{64}$").match, + "sha512": re.compile(r"^[0-9a-f]{128}$").match, + "crc": re.compile(r"^[0-9a-f]{8}$").match, + "etag": re.compile(r"^[0-9a-f]{32}(-\d+)?$").match, } def validate_hashes(**hashes): - ''' + """ Validate hashes against known and valid hashing algorithms. - ''' + """ if not all(h in ACCEPTABLE_HASHES for h in hashes): - raise UserError('invalid hash types specified') + raise UserError("invalid hash types specified") if not all(ACCEPTABLE_HASHES[h](v) for h, v in hashes.items()): - raise UserError('invalid hash values specified') + raise UserError("invalid hash values specified") -@blueprint.route('/index/', methods=['GET']) +@blueprint.route("/index/", methods=["GET"]) def get_index(): - ''' + """ Returns a list of records. - ''' - limit = flask.request.args.get('limit') - start = flask.request.args.get('start') + """ + limit = flask.request.args.get("limit") + start = flask.request.args.get("start") - ids = flask.request.args.get('ids') + ids = flask.request.args.get("ids") if ids: - ids = ids.split(',') + ids = ids.split(",") if start is not None or limit is not None: - raise UserError( - 'pagination is not supported when ids is provided') + raise UserError("pagination is not supported when ids is provided") try: limit = 100 if limit is None else int(limit) except ValueError as err: - raise UserError('limit must be an integer') + raise UserError("limit must be an integer") if limit <= 0 or limit > 1024: - raise UserError('limit must be between 1 and 1024') + raise UserError("limit must be between 1 and 1024") - size = flask.request.args.get('size') + size = flask.request.args.get("size") try: size = size if size is None else int(size) except ValueError as err: - raise UserError('size must be an integer') + raise UserError("size must be an integer") if size is not None and size < 0: - raise UserError('size must be > 0') + raise UserError("size must be > 0") - uploader = flask.request.args.get('uploader') + uploader = flask.request.args.get("uploader") # TODO: Based on indexclient, url here should be urls instead. Or change urls to url in indexclient. - urls = flask.request.args.getlist('url') + urls = flask.request.args.getlist("url") - file_name = flask.request.args.get('file_name') + file_name = flask.request.args.get("file_name") - version = flask.request.args.get('version') + version = flask.request.args.get("version") - hashes = flask.request.args.getlist('hash') - hashes = {h: v for h, v in map(lambda x: x.split(':', 1), hashes)} + hashes = flask.request.args.getlist("hash") + hashes = {h: v for h, v in [x.split(":", 1) for x in hashes]} validate_hashes(**hashes) hashes = hashes if hashes else None - metadata = flask.request.args.getlist('metadata') - metadata = {k: v for k, v in map(lambda x: x.split(':', 1), metadata)} + metadata = flask.request.args.getlist("metadata") + metadata = {k: v for k, v in [x.split(":", 1) for x in metadata]} - acl = flask.request.args.get('acl') + acl = flask.request.args.get("acl") if acl is not None: - acl = [] if acl == 'null' else acl.split(',') + acl = [] if acl == "null" else acl.split(",") - authz = flask.request.args.get('authz') + authz = flask.request.args.get("authz") if authz is not None: - authz = [] if authz == 'null' else authz.split(',') + authz = [] if authz == "null" else authz.split(",") - urls_metadata = flask.request.args.get('urls_metadata') + urls_metadata = flask.request.args.get("urls_metadata") if urls_metadata: try: urls_metadata = json.loads(urls_metadata) except ValueError: - raise UserError('urls_metadata must be a valid json string') + raise UserError("urls_metadata must be a valid json string") if limit < 0 or limit > 1024: - raise UserError('limit must be between 0 and 1024') + raise UserError("limit must be between 0 and 1024") - negate_params = flask.request.args.get('negate_params') + negate_params = flask.request.args.get("negate_params") if negate_params: try: negate_params = json.loads(negate_params) except ValueError: - raise UserError('negate_params must be a valid json string') + raise UserError("negate_params must be a valid json string") records = blueprint.index_driver.ids( start=start, @@ -133,122 +132,112 @@ def get_index(): ids=ids, metadata=metadata, urls_metadata=urls_metadata, - negate_params=negate_params + negate_params=negate_params, ) base = { - 'ids': ids, - 'records': records, - 'limit': limit, - 'start': start, - 'size': size, - 'file_name': file_name, - 'version': version, - 'urls': urls, - 'acl': acl, - 'authz': authz, - 'hashes': hashes, - 'metadata': metadata, + "ids": ids, + "records": records, + "limit": limit, + "start": start, + "size": size, + "file_name": file_name, + "version": version, + "urls": urls, + "acl": acl, + "authz": authz, + "hashes": hashes, + "metadata": metadata, } return flask.jsonify(base), 200 -@blueprint.route('/urls/', methods=['GET']) +@blueprint.route("/urls/", methods=["GET"]) def get_urls(): - ''' + """ Returns a list of urls. - ''' - ids = flask.request.args.getlist('ids') - hashes = flask.request.args.getlist('hash') - hashes = {h: v for h, v in map(lambda x: x.split(':', 1), hashes)} - size = flask.request.args.get('size') + """ + ids = flask.request.args.getlist("ids") + hashes = flask.request.args.getlist("hash") + hashes = {h: v for h, v in [x.split(":", 1) for x in hashes]} + size = flask.request.args.get("size") if size: try: size = int(size) except TypeError: - raise UserError('size must be an integer') + raise UserError("size must be an integer") if size < 0: - raise UserError('size must be >= 0') + raise UserError("size must be >= 0") try: - start = int(flask.request.args.get('start', 0)) + start = int(flask.request.args.get("start", 0)) except TypeError: - raise UserError('start must be an integer') + raise UserError("start must be an integer") try: - limit = int(flask.request.args.get('limit', 100)) + limit = int(flask.request.args.get("limit", 100)) except TypeError: - raise UserError('limit must be an integer') + raise UserError("limit must be an integer") if start < 0: - raise UserError('start must be >= 0') + raise UserError("start must be >= 0") if limit < 0: - raise UserError('limit must be >= 0') + raise UserError("limit must be >= 0") if limit > 1024: - raise UserError('limit must be <= 1024') + raise UserError("limit must be <= 1024") validate_hashes(**hashes) urls = blueprint.index_driver.get_urls( - size=size, - ids=ids, - hashes=hashes, - start=start, - limit=limit, + size=size, ids=ids, hashes=hashes, start=start, limit=limit ) - ret = { - 'urls': urls, - 'limit': limit, - 'start': start, - 'size': size, - 'hashes': hashes, - } + ret = {"urls": urls, "limit": limit, "start": start, "size": size, "hashes": hashes} return flask.jsonify(ret), 200 -@blueprint.route('/index/', methods=['GET']) +@blueprint.route("/index/", methods=["GET"]) def get_index_record(record): - ''' + """ Returns a record. - ''' + """ ret = blueprint.index_driver.get(record) return flask.jsonify(ret), 200 -@blueprint.route('/index/', methods=['POST']) +@blueprint.route("/index/", methods=["POST"]) def post_index_record(): - ''' + """ Create a new record. - ''' + """ try: jsonschema.validate(flask.request.json, POST_RECORD_SCHEMA) except jsonschema.ValidationError as err: raise UserError(err) - authz = flask.request.json.get('authz', []) + authz = flask.request.json.get("authz", []) authorize("create", authz) - did = flask.request.json.get('did') - form = flask.request.json['form'] - size = flask.request.json['size'] - urls = flask.request.json['urls'] - acl = flask.request.json.get('acl', []) + did = flask.request.json.get("did") + form = flask.request.json["form"] + size = flask.request.json["size"] + urls = flask.request.json["urls"] + acl = flask.request.json.get("acl", []) - hashes = flask.request.json['hashes'] - file_name = flask.request.json.get('file_name') - metadata = flask.request.json.get('metadata') - urls_metadata = flask.request.json.get('urls_metadata') - version = flask.request.json.get('version') - baseid = flask.request.json.get('baseid') - uploader = flask.request.json.get('uploader') + hashes = flask.request.json["hashes"] + file_name = flask.request.json.get("file_name") + metadata = flask.request.json.get("metadata") + urls_metadata = flask.request.json.get("urls_metadata") + version = flask.request.json.get("version") + baseid = flask.request.json.get("baseid") + uploader = flask.request.json.get("uploader") did, rev, baseid = blueprint.index_driver.add( form, @@ -266,70 +255,53 @@ def post_index_record(): uploader=uploader, ) - ret = { - 'did': did, - 'rev': rev, - 'baseid': baseid, - } + ret = {"did": did, "rev": rev, "baseid": baseid} return flask.jsonify(ret), 200 -@blueprint.route('/index/blank/', methods=['POST']) +@blueprint.route("/index/blank/", methods=["POST"]) @authorize def post_index_blank_record(): - ''' + """ Create a blank new record with only uploader and optionally file_name fields filled - ''' + """ - uploader = flask.request.get_json().get('uploader') - file_name = flask.request.get_json().get('file_name') + uploader = flask.request.get_json().get("uploader") + file_name = flask.request.get_json().get("file_name") if not uploader: - raise UserError('no uploader specified') + raise UserError("no uploader specified") did, rev, baseid = blueprint.index_driver.add_blank_record( - uploader=uploader, - file_name=file_name + uploader=uploader, file_name=file_name ) - ret = { - 'did': did, - 'rev': rev, - 'baseid': baseid, - } + ret = {"did": did, "rev": rev, "baseid": baseid} return flask.jsonify(ret), 201 -@blueprint.route('/index/blank/', methods=['PUT']) +@blueprint.route("/index/blank/", methods=["PUT"]) @authorize def put_index_blank_record(record): - ''' + """ Update a blank record with size, hashes and url - ''' - rev = flask.request.args.get('rev') - size = flask.request.get_json().get('size') - hashes = flask.request.get_json().get('hashes') - urls = flask.request.get_json().get('urls') + """ + rev = flask.request.args.get("rev") + size = flask.request.get_json().get("size") + hashes = flask.request.get_json().get("hashes") + urls = flask.request.get_json().get("urls") did, rev, baseid = blueprint.index_driver.update_blank_record( - did=record, - rev=rev, - size=size, - hashes=hashes, - urls=urls, + did=record, rev=rev, size=size, hashes=hashes, urls=urls ) - ret = { - 'did': did, - 'rev': rev, - 'baseid': baseid, - } + ret = {"did": did, "rev": rev, "baseid": baseid} return flask.jsonify(ret), 200 -@blueprint.route('/index/', methods=['PUT']) +@blueprint.route("/index/", methods=["PUT"]) def put_index_record(record): """ Update an existing record. @@ -339,59 +311,51 @@ def put_index_record(record): except jsonschema.ValidationError as err: raise UserError(err) - rev = flask.request.args.get('rev') + rev = flask.request.args.get("rev") # authorize done in update - did, baseid, rev = blueprint.index_driver.update( - record, - rev, - flask.request.json, - ) + did, baseid, rev = blueprint.index_driver.update(record, rev, flask.request.json) - ret = { - 'did': did, - 'baseid': baseid, - 'rev': rev, - } + ret = {"did": did, "baseid": baseid, "rev": rev} return flask.jsonify(ret), 200 -@blueprint.route('/index/', methods=['DELETE']) +@blueprint.route("/index/", methods=["DELETE"]) def delete_index_record(record): - ''' + """ Delete an existing record. - ''' - rev = flask.request.args.get('rev') + """ + rev = flask.request.args.get("rev") if rev is None: - raise UserError('no revision specified') + raise UserError("no revision specified") # authorize done in delete blueprint.index_driver.delete(record, rev) - return '', 200 + return "", 200 -@blueprint.route('/index/', methods=['POST']) +@blueprint.route("/index/", methods=["POST"]) def add_index_record_version(record): - ''' + """ Add a record version - ''' + """ try: jsonschema.validate(flask.request.json, POST_RECORD_SCHEMA) except jsonschema.ValidationError as err: raise UserError(err) - new_did = flask.request.json.get('did') - form = flask.request.json['form'] - size = flask.request.json['size'] - urls = flask.request.json['urls'] - acl = flask.request.json.get('acl', []) - authz = flask.request.json.get('authz', []) - hashes = flask.request.json['hashes'] - file_name = flask.request.json.get('file_name') - metadata = flask.request.json.get('metadata') - urls_metadata = flask.request.json.get('urls_metadata') - version = flask.request.json.get('version') + new_did = flask.request.json.get("did") + form = flask.request.json["form"] + size = flask.request.json["size"] + urls = flask.request.json["urls"] + acl = flask.request.json.get("acl", []) + authz = flask.request.json.get("authz", []) + hashes = flask.request.json["hashes"] + file_name = flask.request.json.get("file_name") + metadata = flask.request.json.get("metadata") + urls_metadata = flask.request.json.get("urls_metadata") + version = flask.request.json.get("version") # authorize done in add_version for both the old and new authz did, baseid, rev = blueprint.index_driver.add_version( @@ -409,74 +373,63 @@ def add_index_record_version(record): hashes=hashes, ) - ret = { - 'did': did, - 'baseid': baseid, - 'rev': rev, - } + ret = {"did": did, "baseid": baseid, "rev": rev} return flask.jsonify(ret), 200 -@blueprint.route('/index//versions', methods=['GET']) +@blueprint.route("/index//versions", methods=["GET"]) def get_all_index_record_versions(record): - ''' + """ Get all record versions - ''' + """ ret = blueprint.index_driver.get_all_versions(record) return flask.jsonify(ret), 200 -@blueprint.route('/index//latest', methods=['GET']) +@blueprint.route("/index//latest", methods=["GET"]) def get_latest_index_record_versions(record): - ''' + """ Get the latest record version - ''' - has_version = flask.request.args.get('has_version', '').lower() == 'true' - ret = blueprint.index_driver.get_latest_version( - record, has_version=has_version) + """ + has_version = flask.request.args.get("has_version", "").lower() == "true" + ret = blueprint.index_driver.get_latest_version(record, has_version=has_version) return flask.jsonify(ret), 200 -@blueprint.route('/_status', methods=['GET']) +@blueprint.route("/_status", methods=["GET"]) def health_check(): - ''' + """ Health Check. - ''' + """ blueprint.index_driver.health_check() - return 'Healthy', 200 + return "Healthy", 200 -@blueprint.route('/_stats', methods=['GET']) +@blueprint.route("/_stats", methods=["GET"]) def stats(): - ''' + """ Return indexed data stats. - ''' + """ filecount = blueprint.index_driver.len() totalfilesize = blueprint.index_driver.totalbytes() - base = { - 'fileCount': filecount, - 'totalFileSize': totalfilesize, - } + base = {"fileCount": filecount, "totalFileSize": totalfilesize} return flask.jsonify(base), 200 -@blueprint.route('/_version', methods=['GET']) +@blueprint.route("/_version", methods=["GET"]) def version(): - ''' + """ Return the version of this service. - ''' + """ - base = { - 'version': VERSION, - 'commit': COMMIT, - } + base = {"version": VERSION, "commit": COMMIT} return flask.jsonify(base), 200 @@ -513,5 +466,5 @@ def handle_unhealthy_check(err): @blueprint.record def get_config(setup_state): - config = setup_state.app.config['INDEX'] - blueprint.index_driver = config['driver'] + config = setup_state.app.config["INDEX"] + blueprint.index_driver = config["driver"] diff --git a/indexd/index/driver.py b/indexd/index/driver.py index 668268e62..7de58b4fb 100644 --- a/indexd/index/driver.py +++ b/indexd/index/driver.py @@ -2,129 +2,156 @@ from ..driver_base import SQLAlchemyDriverBase -class IndexDriverABC(SQLAlchemyDriverBase): - ''' +class IndexDriverABC(SQLAlchemyDriverBase, metaclass=abc.ABCMeta): + """ Index Driver Abstract Base Class Driver interface for interacting with index backends. - ''' + """ + def __init__(self, conn, **config): super(IndexDriverABC, self).__init__(conn, **config) - __metaclass__ = abc.ABCMeta - @abc.abstractmethod - def ids(self, - limit=100, - start=None, - size=None, - urls=None, - acl=None, - authz=None, - hashes=None, - file_name=None, - version=None, - metadata=None, - ids=None): - ''' + def ids( + self, + limit=100, + start=None, + size=None, + urls=None, + acl=None, + authz=None, + hashes=None, + file_name=None, + version=None, + uploader=None, + metadata=None, + ids=None, + urls_metadata=None, + negate_params=None, + ): + """ Returns a list of records stored by the backend. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def get_urls(self, size=None, hashes=None, ids=None, start=0, limit=100): - ''' + """ Returns a list of urls matching supplied size and hashes. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def add( - self, form, did=None, size=None, urls=None, - hashes=None, file_name=None, metadata=None, - urls_metadata=None, version=None): - ''' + self, + form, + did=None, + size=None, + file_name=None, + metadata=None, + urls_metadata=None, + version=None, + urls=None, + acl=None, + authz=None, + hashes=None, + baseid=None, + uploader=None, + ): + """ Creates record for given data. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def get(self, did): - ''' + """ Gets a record given the record id. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def update(self, did, rev, changing_fields): - ''' + """ Updates record with new values. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def delete(self, did, rev): - ''' + """ Deletes record. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def add_version( - self, did, form, size=None, - file_name=None, metadata=None, urls=None, - urls_metadata=None, hashes=None, version=None): - ''' + self, + current_did, + form, + new_did=None, + size=None, + file_name=None, + metadata=None, + urls_metadata=None, + version=None, + urls=None, + acl=None, + authz=None, + hashes=None, + ): + """ Add a record version given did - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def get_all_versions(self, did): - ''' + """ Get all record versions given did - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def get_latest_version(self, did, has_version=None): - ''' + """ Get the lattest record version given did - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def health_check(self): - ''' + """ Performs a health check. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def __contains__(self, did): - ''' + """ Returns True if record is stored by backend. Returns False otherwise. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def __iter__(self): - ''' + """ Returns an iterator over unique records stored by backend. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def totalbytes(self): - ''' + """ Returns the total bytes of the data represented in the index. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") @abc.abstractmethod def len(self): - ''' + """ Returns the number of unique records stored by backend. - ''' - raise NotImplementedError('TODO') + """ + raise NotImplementedError("TODO") diff --git a/indexd/index/drivers/alchemy.py b/indexd/index/drivers/alchemy.py index ff3070947..450e4d7ab 100644 --- a/indexd/index/drivers/alchemy.py +++ b/indexd/index/drivers/alchemy.py @@ -3,7 +3,6 @@ from contextlib import contextmanager from cdislogging import get_logger -from future.utils import iteritems from sqlalchemy import ( BigInteger, Column, @@ -32,11 +31,7 @@ RevisionMismatch, UnhealthyCheck, ) -from indexd.utils import ( - init_schema_version, - is_empty_database, - migrate_database, -) +from indexd.utils import init_schema_version, is_empty_database, migrate_database Base = declarative_base() @@ -45,20 +40,21 @@ class BaseVersion(Base): """ Base index record version representation. """ - __tablename__ = 'base_version' + + __tablename__ = "base_version" baseid = Column(String, primary_key=True) dids = relationship( - 'IndexRecord', - backref='base_version', - cascade='all, delete-orphan') + "IndexRecord", backref="base_version", cascade="all, delete-orphan" + ) class IndexSchemaVersion(Base): """ Table to track current database's schema version """ - __tablename__ = 'index_schema_version' + + __tablename__ = "index_schema_version" version = Column(Integer, default=0, primary_key=True) @@ -66,11 +62,12 @@ class IndexRecord(Base): """ Base index record representation. """ - __tablename__ = 'index_record' + + __tablename__ = "index_record" did = Column(String, primary_key=True) - baseid = Column(String, ForeignKey('base_version.baseid'), index=True) + baseid = Column(String, ForeignKey("base_version.baseid"), index=True) rev = Column(String) form = Column(String) size = Column(BigInteger, index=True) @@ -81,39 +78,27 @@ class IndexRecord(Base): uploader = Column(String, index=True) urls = relationship( - 'IndexRecordUrl', - backref='index_record', - cascade='all, delete-orphan', + "IndexRecordUrl", backref="index_record", cascade="all, delete-orphan" ) acl = relationship( - 'IndexRecordACE', - backref='index_record', - cascade='all, delete-orphan', + "IndexRecordACE", backref="index_record", cascade="all, delete-orphan" ) authz = relationship( - 'IndexRecordAuthz', - backref='index_record', - cascade='all, delete-orphan', + "IndexRecordAuthz", backref="index_record", cascade="all, delete-orphan" ) hashes = relationship( - 'IndexRecordHash', - backref='index_record', - cascade='all, delete-orphan', + "IndexRecordHash", backref="index_record", cascade="all, delete-orphan" ) index_metadata = relationship( - 'IndexRecordMetadata', - backref='index_record', - cascade='all, delete-orphan', + "IndexRecordMetadata", backref="index_record", cascade="all, delete-orphan" ) aliases = relationship( - 'IndexRecordAlias', - backref='index_record', - cascade='all, delete-orphan', + "IndexRecordAlias", backref="index_record", cascade="all, delete-orphan" ) def to_document_dict(self): @@ -127,26 +112,27 @@ def to_document_dict(self): metadata = {m.key: m.value for m in self.index_metadata} urls_metadata = { - u.url: {m.key: m.value for m in u.url_metadata} for u in self.urls} + u.url: {m.key: m.value for m in u.url_metadata} for u in self.urls + } created_date = self.created_date.isoformat() updated_date = self.updated_date.isoformat() return { - 'did': self.did, - 'baseid': self.baseid, - 'rev': self.rev, - 'size': self.size, - 'file_name': self.file_name, - 'version': self.version, - 'uploader': self.uploader, - 'urls': urls, - 'urls_metadata': urls_metadata, - 'acl': acl, + "did": self.did, + "baseid": self.baseid, + "rev": self.rev, + "size": self.size, + "file_name": self.file_name, + "version": self.version, + "uploader": self.uploader, + "urls": urls, + "urls_metadata": urls_metadata, + "acl": acl, "authz": authz, - 'hashes': hashes, - 'metadata': metadata, - 'form': self.form, - 'created_date': created_date, + "hashes": hashes, + "metadata": metadata, + "form": self.form, + "created_date": created_date, "updated_date": updated_date, } @@ -156,14 +142,14 @@ class IndexRecordAlias(Base): Alias attached to index record """ - __tablename__ = 'index_record_alias' + __tablename__ = "index_record_alias" - did = Column(String, ForeignKey('index_record.did'), primary_key=True) + did = Column(String, ForeignKey("index_record.did"), primary_key=True) name = Column(String, primary_key=True) __table_args__ = ( - Index('index_record_alias_idx', 'did'), - Index('index_record_alias_name', 'name'), + Index("index_record_alias_idx", "did"), + Index("index_record_alias_name", "name"), ) @@ -172,19 +158,17 @@ class IndexRecordUrl(Base): Base index record url representation. """ - __tablename__ = 'index_record_url' + __tablename__ = "index_record_url" - did = Column(String, ForeignKey('index_record.did'), primary_key=True) + did = Column(String, ForeignKey("index_record.did"), primary_key=True) url = Column(String, primary_key=True) url_metadata = relationship( - 'IndexRecordUrlMetadata', - backref='index_record_url', - cascade='all, delete-orphan', - ) - __table_args__ = ( - Index('index_record_url_idx', 'did'), + "IndexRecordUrlMetadata", + backref="index_record_url", + cascade="all, delete-orphan", ) + __table_args__ = (Index("index_record_url_idx", "did"),) class IndexRecordACE(Base): @@ -192,15 +176,13 @@ class IndexRecordACE(Base): index record access control entry representation. """ - __tablename__ = 'index_record_ace' + __tablename__ = "index_record_ace" - did = Column(String, ForeignKey('index_record.did'), primary_key=True) + did = Column(String, ForeignKey("index_record.did"), primary_key=True) # access control entry ace = Column(String, primary_key=True) - __table_args__ = ( - Index('index_record_ace_idx', 'did'), - ) + __table_args__ = (Index("index_record_ace_idx", "did"),) class IndexRecordAuthz(Base): @@ -208,14 +190,12 @@ class IndexRecordAuthz(Base): index record access control (authz) entry representation. """ - __tablename__ = 'index_record_authz' + __tablename__ = "index_record_authz" - did = Column(String, ForeignKey('index_record.did'), primary_key=True) + did = Column(String, ForeignKey("index_record.did"), primary_key=True) resource = Column(String, primary_key=True) - __table_args__ = ( - Index('index_record_authz_idx', 'did'), - ) + __table_args__ = (Index("index_record_authz_idx", "did"),) class IndexRecordMetadata(Base): @@ -223,13 +203,11 @@ class IndexRecordMetadata(Base): Metadata attached to index document """ - __tablename__ = 'index_record_metadata' + __tablename__ = "index_record_metadata" key = Column(String, primary_key=True) - did = Column(String, ForeignKey('index_record.did'), primary_key=True) + did = Column(String, ForeignKey("index_record.did"), primary_key=True) value = Column(String) - __table_args__ = ( - Index('index_record_metadata_idx', 'did'), - ) + __table_args__ = (Index("index_record_metadata_idx", "did"),) class IndexRecordUrlMetadata(Base): @@ -237,15 +215,16 @@ class IndexRecordUrlMetadata(Base): Metadata attached to url """ - __tablename__ = 'index_record_url_metadata' + __tablename__ = "index_record_url_metadata" key = Column(String, primary_key=True) url = Column(String, primary_key=True) did = Column(String, index=True, primary_key=True) value = Column(String) __table_args__ = ( - ForeignKeyConstraint(['did', 'url'], - ['index_record_url.did', 'index_record_url.url']), - Index('index_record_url_metadata_idx', 'did'), + ForeignKeyConstraint( + ["did", "url"], ["index_record_url.did", "index_record_url.url"] + ), + Index("index_record_url_metadata_idx", "did"), ) @@ -253,14 +232,15 @@ class IndexRecordHash(Base): """ Base index record hash representation. """ - __tablename__ = 'index_record_hash' - did = Column(String, ForeignKey('index_record.did'), primary_key=True) + __tablename__ = "index_record_hash" + + did = Column(String, ForeignKey("index_record.did"), primary_key=True) hash_type = Column(String, primary_key=True) hash_value = Column(String) __table_args__ = ( - Index('index_record_hash_idx', 'did'), - Index('index_record_hash_type_value_idx', 'hash_value', 'hash_type'), + Index("index_record_hash_idx", "did"), + Index("index_record_hash_type_value_idx", "hash_value", "hash_type"), ) @@ -269,13 +249,11 @@ def create_urls_metadata(urls_metadata, record, session): create url metadata record in database """ urls = {u.url for u in record.urls} - for url, url_metadata in iteritems(urls_metadata): + for url, url_metadata in urls_metadata.items(): if url not in urls: - raise UserError( - 'url {} in urls_metadata does not exist'.format(url)) - for k, v in iteritems(url_metadata): - session.add(IndexRecordUrlMetadata( - url=url, key=k, value=v, did=record.did)) + raise UserError("url {} in urls_metadata does not exist".format(url)) + for k, v in url_metadata.items(): + session.add(IndexRecordUrlMetadata(url=url, key=k, value=v, did=record.did)) class SQLAlchemyIndexDriver(IndexDriverABC): @@ -284,13 +262,13 @@ class SQLAlchemyIndexDriver(IndexDriverABC): """ def __init__( - self, conn, logger=None, auto_migrate=True, - index_config=None, **config): + self, conn, logger=None, auto_migrate=True, index_config=None, **config + ): """ Initialize the SQLAlchemy database driver. """ super(SQLAlchemyIndexDriver, self).__init__(conn, **config) - self.logger = logger or get_logger('SQLAlchemyIndexDriver') + self.logger = logger or get_logger("SQLAlchemyIndexDriver") self.config = index_config or {} Base.metadata.bind = self.engine @@ -300,9 +278,8 @@ def __init__( Base.metadata.create_all() if is_empty_db: init_schema_version( - driver=self, - model=IndexSchemaVersion, - version=CURRENT_SCHEMA_VERSION) + driver=self, model=IndexSchemaVersion, version=CURRENT_SCHEMA_VERSION + ) if auto_migrate: self.migrate_index_database() @@ -315,7 +292,8 @@ def migrate_index_database(self): driver=self, migrate_functions=SCHEMA_MIGRATION_FUNCTIONS, current_schema_version=CURRENT_SCHEMA_VERSION, - model=IndexSchemaVersion) + model=IndexSchemaVersion, + ) @property @contextmanager @@ -334,21 +312,23 @@ def session(self): finally: session.close() - def ids(self, - limit=100, - start=None, - size=None, - urls=None, - acl=None, - authz=None, - hashes=None, - file_name=None, - version=None, - uploader=None, - metadata=None, - ids=None, - urls_metadata=None, - negate_params=None): + def ids( + self, + limit=100, + start=None, + size=None, + urls=None, + acl=None, + authz=None, + hashes=None, + file_name=None, + version=None, + uploader=None, + metadata=None, + ids=None, + urls_metadata=None, + negate_params=None, + ): """ Returns list of records stored by the backend. """ @@ -357,8 +337,9 @@ def ids(self, # Enable joinedload on all relationships so that we won't have to # do a bunch of selects when we assemble our response. - query = query.options(joinedload(IndexRecord.urls). - joinedload(IndexRecordUrl.url_metadata)) + query = query.options( + joinedload(IndexRecord.urls).joinedload(IndexRecordUrl.url_metadata) + ) query = query.options(joinedload(IndexRecord.acl)) query = query.options(joinedload(IndexRecord.authz)) query = query.options(joinedload(IndexRecord.hashes)) @@ -402,10 +383,12 @@ def ids(self, if hashes: for h, v in hashes.items(): sub = session.query(IndexRecordHash.did) - sub = sub.filter(and_( - IndexRecordHash.hash_type == h, - IndexRecordHash.hash_value == v, - )) + sub = sub.filter( + and_( + IndexRecordHash.hash_type == h, + IndexRecordHash.hash_value == v, + ) + ) query = query.filter(IndexRecord.did.in_(sub.subquery())) if metadata: @@ -413,24 +396,24 @@ def ids(self, sub = session.query(IndexRecordMetadata.did) sub = sub.filter( and_( - IndexRecordMetadata.key == k, - IndexRecordMetadata.value == v, - )) + IndexRecordMetadata.key == k, IndexRecordMetadata.value == v + ) + ) query = query.filter(IndexRecord.did.in_(sub.subquery())) if urls_metadata: - query = query.join(IndexRecord.urls).join( - IndexRecordUrl.url_metadata) + query = query.join(IndexRecord.urls).join(IndexRecordUrl.url_metadata) for url_key, url_dict in urls_metadata.items(): - query = query.filter( - IndexRecordUrlMetadata.url.contains(url_key)) + query = query.filter(IndexRecordUrlMetadata.url.contains(url_key)) for k, v in url_dict.items(): - query = query.filter(IndexRecordUrl.url_metadata.any( - and_( - IndexRecordUrlMetadata.key == k, - IndexRecordUrlMetadata.value == v + query = query.filter( + IndexRecordUrl.url_metadata.any( + and_( + IndexRecordUrlMetadata.key == k, + IndexRecordUrlMetadata.value == v, + ) ) - )) + ) if negate_params: query = self._negate_filter(session, query, **negate_params) @@ -452,15 +435,17 @@ def ids(self, return [i.to_document_dict() for i in query] @staticmethod - def _negate_filter(session, - query, - urls=None, - acl=None, - authz=None, - file_name=None, - version=None, - metadata=None, - urls_metadata=None): + def _negate_filter( + session, + query, + urls=None, + acl=None, + authz=None, + file_name=None, + version=None, + metadata=None, + urls_metadata=None, + ): """ param_values passed in here will be negated @@ -503,18 +488,21 @@ def _negate_filter(session, if authz is not None and authz: query = query.join(IndexRecord.authz) for u in authz: - query = query.filter(~IndexRecord.authz.any(IndexRecordAuthz.resource == u)) + query = query.filter( + ~IndexRecord.authz.any(IndexRecordAuthz.resource == u) + ) if metadata is not None and metadata: for k, v in metadata.items(): if not v: - query = query.filter(~IndexRecord.index_metadata.any(IndexRecordMetadata.key == k)) + query = query.filter( + ~IndexRecord.index_metadata.any(IndexRecordMetadata.key == k) + ) else: sub = session.query(IndexRecordMetadata.did) sub = sub.filter( and_( - IndexRecordMetadata.key == k, - IndexRecordMetadata.value == v + IndexRecordMetadata.key == k, IndexRecordMetadata.value == v ) ) query = query.filter(~IndexRecord.did.in_(sub.subquery())) @@ -527,18 +515,21 @@ def _negate_filter(session, else: for k, v in url_dict.items(): if not v: - query = query.filter(~IndexRecordUrl.url_metadata.any( - and_(IndexRecordUrlMetadata.key == k, - IndexRecordUrlMetadata.url.contains(url_key) - )) + query = query.filter( + ~IndexRecordUrl.url_metadata.any( + and_( + IndexRecordUrlMetadata.key == k, + IndexRecordUrlMetadata.url.contains(url_key), + ) ) + ) else: sub = session.query(IndexRecordUrlMetadata.did) sub = sub.filter( and_( IndexRecordUrlMetadata.url.contains(url_key), IndexRecordUrlMetadata.key == k, - IndexRecordUrlMetadata.value == v + IndexRecordUrlMetadata.value == v, ) ) query = query.filter(~IndexRecord.did.in_(sub.subquery())) @@ -561,10 +552,12 @@ def get_urls(self, size=None, hashes=None, ids=None, start=0, limit=100): for h, v in hashes.items(): # Select subset that matches given hash. sub = session.query(IndexRecordHash.did) - sub = sub.filter(and_( - IndexRecordHash.hash_type == h, - IndexRecordHash.hash_value == v, - )) + sub = sub.filter( + and_( + IndexRecordHash.hash_type == h, + IndexRecordHash.hash_value == v, + ) + ) # Filter anything that does not match. query = query.filter(IndexRecordUrl.did.in_(sub.subquery())) @@ -578,25 +571,26 @@ def get_urls(self, size=None, hashes=None, ids=None, start=0, limit=100): query = query.limit(limit) return [ - {'url': r.url, - 'metadata': {m.key: m.value for m in r.url_metadata}} + {"url": r.url, "metadata": {m.key: m.value for m in r.url_metadata}} for r in query ] - def add(self, - form, - did=None, - size=None, - file_name=None, - metadata=None, - urls_metadata=None, - version=None, - urls=None, - acl=None, - authz=None, - hashes=None, - baseid=None, - uploader=None): + def add( + self, + form, + did=None, + size=None, + file_name=None, + metadata=None, + urls_metadata=None, + version=None, + urls=None, + acl=None, + authz=None, + hashes=None, + baseid=None, + uploader=None, + ): """ Creates a new record given size, urls, acl, authz, hashes, metadata, urls_metadata file name and version @@ -627,8 +621,8 @@ def add(self, record.did = did else: new_did = str(uuid.uuid4()) - if self.config.get('PREPEND_PREFIX'): - new_did = self.config['DEFAULT_PREFIX'] + new_did + if self.config.get("PREPEND_PREFIX"): + new_did = self.config["DEFAULT_PREFIX"] + new_did record.did = new_did record.rev = str(uuid.uuid4())[:8] @@ -637,43 +631,37 @@ def add(self, record.uploader = uploader - record.urls = [IndexRecordUrl( - did=record.did, - url=url, - ) for url in urls] - - record.acl = [IndexRecordACE( - did=record.did, - ace=ace, - ) for ace in set(acl)] - - record.authz = [IndexRecordAuthz( - did=record.did, - resource=resource, - ) for resource in set(authz)] - - record.hashes = [IndexRecordHash( - did=record.did, - hash_type=h, - hash_value=v, - ) for h, v in hashes.items()] - - record.index_metadata = [IndexRecordMetadata( - did=record.did, - key=m_key, - value=m_value - ) for m_key, m_value in metadata.items()] + record.urls = [IndexRecordUrl(did=record.did, url=url) for url in urls] + + record.acl = [IndexRecordACE(did=record.did, ace=ace) for ace in set(acl)] + + record.authz = [ + IndexRecordAuthz(did=record.did, resource=resource) + for resource in set(authz) + ] + + record.hashes = [ + IndexRecordHash(did=record.did, hash_type=h, hash_value=v) + for h, v in hashes.items() + ] + + record.index_metadata = [ + IndexRecordMetadata(did=record.did, key=m_key, value=m_value) + for m_key, m_value in metadata.items() + ] session.merge(base_version) try: session.add(record) create_urls_metadata(urls_metadata, record, session) - if self.config.get('ADD_PREFIX_ALIAS'): + if self.config.get("ADD_PREFIX_ALIAS"): self.add_prefix_alias(record, session) session.commit() except IntegrityError: - raise UserError('did "{did}" already exists'.format(did=record.did), 400) + raise UserError( + 'did "{did}" already exists'.format(did=record.did), 400 + ) return record.did, record.rev, record.baseid @@ -688,8 +676,8 @@ def add_blank_record(self, uploader, file_name=None): did = str(uuid.uuid4()) baseid = str(uuid.uuid4()) - if self.config.get('PREPEND_PREFIX'): - did = self.config['DEFAULT_PREFIX'] + did + if self.config.get("PREPEND_PREFIX"): + did = self.config["DEFAULT_PREFIX"] + did record.did = did base_version.baseid = baseid @@ -722,26 +710,22 @@ def update_blank_record(self, did, rev, size, hashes, urls): try: record = query.one() except NoResultFound: - raise NoRecordFound('no record found') + raise NoRecordFound("no record found") except MultipleResultsFound: - raise MultipleRecordsFound('multiple records found') + raise MultipleRecordsFound("multiple records found") if record.size or record.hashes: raise UserError("update api is not supported for non-empty record!") if rev != record.rev: - raise RevisionMismatch('revision mismatch') + raise RevisionMismatch("revision mismatch") record.size = size - record.hashes = [IndexRecordHash( - did=record.did, - hash_type=h, - hash_value=v, - ) for h, v in hashes.items()] - record.urls = [IndexRecordUrl( - did=record.did, - url=url, - ) for url in urls] + record.hashes = [ + IndexRecordHash(did=record.did, hash_type=h, hash_value=v) + for h, v in hashes.items() + ] + record.urls = [IndexRecordUrl(did=record.did, url=url) for url in urls] record.rev = str(uuid.uuid4())[:8] @@ -750,13 +734,12 @@ def update_blank_record(self, did, rev, size, hashes, urls): return record.did, record.rev, record.baseid - def add_prefix_alias(self, record, session): """ Create a index alias with the alias as {prefix:did} """ - prefix = self.config['DEFAULT_PREFIX'] - alias = IndexRecordAlias(did=record.did, name=prefix+record.did) + prefix = self.config["DEFAULT_PREFIX"] + alias = IndexRecordAlias(did=record.did, name=prefix + record.did) session.add(alias) def get_by_alias(self, alias): @@ -767,12 +750,13 @@ def get_by_alias(self, alias): try: record = ( session.query(IndexRecord) - .filter(IndexRecord.aliases.any(name=alias)).one() + .filter(IndexRecord.aliases.any(name=alias)) + .one() ) except NoResultFound: - raise NoRecordFound('no record found') + raise NoRecordFound("no record found") except MultipleResultsFound: - raise MultipleRecordsFound('multiple records found') + raise MultipleRecordsFound("multiple records found") return record.to_document_dict() def get_aliases_for_did(self, did): @@ -780,10 +764,7 @@ def get_aliases_for_did(self, did): Gets the aliases for a did """ with self.session as session: - query = ( - session.query(IndexRecordAlias) - .filter(IndexRecordAlias.did == did) - ) + query = session.query(IndexRecordAlias).filter(IndexRecordAlias.did == did) return [i.name for i in query] def get(self, did): @@ -795,11 +776,11 @@ def get(self, did): query = session.query(IndexRecord) query = query.filter( or_(IndexRecord.did == did, IndexRecord.baseid == did) - ).order_by(IndexRecord.created_date.desc()) + ).order_by(IndexRecord.created_date.desc()) record = query.first() if record is None: - raise NoRecordFound('no record found') + raise NoRecordFound("no record found") return record.to_document_dict() def update(self, did, rev, changing_fields): @@ -807,7 +788,7 @@ def update(self, did, rev, changing_fields): Updates an existing record with new values. """ - composite_fields = ['urls', 'acl', "authz", 'metadata', 'urls_metadata'] + composite_fields = ["urls", "acl", "authz", "metadata", "urls_metadata"] with self.session as session: query = session.query(IndexRecord).filter(IndexRecord.did == did) @@ -815,66 +796,59 @@ def update(self, did, rev, changing_fields): try: record = query.one() except NoResultFound: - raise NoRecordFound('no record found') + raise NoRecordFound("no record found") except MultipleResultsFound: - raise MultipleRecordsFound('multiple records found') + raise MultipleRecordsFound("multiple records found") if rev != record.rev: - raise RevisionMismatch('revision mismatch') + raise RevisionMismatch("revision mismatch") auth.authorize("update", [u.resource for u in record.authz]) # Some operations are dependant on other operations. For example # urls has to be updated before urls_metadata because of schema # constraints. - if 'urls' in changing_fields: + if "urls" in changing_fields: for url in record.urls: session.delete(url) record.urls = [ IndexRecordUrl(did=record.did, url=url) - for url in changing_fields['urls'] + for url in changing_fields["urls"] ] - if 'acl' in changing_fields: + if "acl" in changing_fields: for ace in record.acl: session.delete(ace) record.acl = [ IndexRecordACE(did=record.did, ace=ace) - for ace in set(changing_fields['acl']) + for ace in set(changing_fields["acl"]) ] - if 'authz' in changing_fields: + if "authz" in changing_fields: for resource in record.authz: session.delete(resource) record.authz = [ IndexRecordAuthz(did=record.did, resource=resource) - for resource in set(changing_fields['authz']) + for resource in set(changing_fields["authz"]) ] - if 'metadata' in changing_fields: + if "metadata" in changing_fields: for md_record in record.index_metadata: session.delete(md_record) record.index_metadata = [ - IndexRecordMetadata( - did=record.did, - key=m_key, - value=m_value - ) - for m_key, m_value in changing_fields['metadata'].items()] + IndexRecordMetadata(did=record.did, key=m_key, value=m_value) + for m_key, m_value in changing_fields["metadata"].items() + ] - if 'urls_metadata' in changing_fields: + if "urls_metadata" in changing_fields: for url in record.urls: for url_metadata in url.url_metadata: session.delete(url_metadata) - create_urls_metadata( - changing_fields['urls_metadata'], - record, - session, - ) + create_urls_metadata(changing_fields["urls_metadata"], record, session) for key, value in changing_fields.items(): if key not in composite_fields: @@ -899,30 +873,32 @@ def delete(self, did, rev): try: record = query.one() except NoResultFound: - raise NoRecordFound('no record found') + raise NoRecordFound("no record found") except MultipleResultsFound: - raise MultipleRecordsFound('multiple records found') + raise MultipleRecordsFound("multiple records found") if rev != record.rev: - raise RevisionMismatch('revision mismatch') + raise RevisionMismatch("revision mismatch") auth.authorize("delete", [u.resource for u in record.authz]) session.delete(record) - def add_version(self, - current_did, - form, - new_did=None, - size=None, - file_name=None, - metadata=None, - urls_metadata=None, - version=None, - urls=None, - acl=None, - authz=None, - hashes=None): + def add_version( + self, + current_did, + form, + new_did=None, + size=None, + file_name=None, + metadata=None, + urls_metadata=None, + version=None, + urls=None, + acl=None, + authz=None, + hashes=None, + ): """ Add a record version given did """ @@ -939,9 +915,9 @@ def add_version(self, try: record = query.one() except NoResultFound: - raise NoRecordFound('no record found') + raise NoRecordFound("no record found") except MultipleResultsFound: - raise MultipleRecordsFound('multiple records found') + raise MultipleRecordsFound("multiple records found") auth.authorize("update", [u.resource for u in record.authz] + authz) @@ -957,39 +933,31 @@ def add_version(self, record.file_name = file_name record.version = version - record.urls = [IndexRecordUrl( - did=record.did, - url=url, - ) for url in urls] - - record.acl = [IndexRecordACE( - did=record.did, - ace=ace, - ) for ace in set(acl)] - - record.authz = [IndexRecordAuthz( - did=record.did, - resource=resource, - ) for resource in set(authz)] - - record.hashes = [IndexRecordHash( - did=record.did, - hash_type=h, - hash_value=v, - ) for h, v in hashes.items()] - - record.index_metadata = [IndexRecordMetadata( - did=record.did, - key=m_key, - value=m_value - ) for m_key, m_value in metadata.items()] + record.urls = [IndexRecordUrl(did=record.did, url=url) for url in urls] + + record.acl = [IndexRecordACE(did=record.did, ace=ace) for ace in set(acl)] + + record.authz = [ + IndexRecordAuthz(did=record.did, resource=resource) + for resource in set(authz) + ] + + record.hashes = [ + IndexRecordHash(did=record.did, hash_type=h, hash_value=v) + for h, v in hashes.items() + ] + + record.index_metadata = [ + IndexRecordMetadata(did=record.did, key=m_key, value=m_value) + for m_key, m_value in metadata.items() + ] try: session.add(record) create_urls_metadata(urls_metadata, record, session) session.commit() except IntegrityError: - raise UserError('{did} already exists'.format(did=did), 400) + raise UserError("{did} already exists".format(did=did), 400) return record.did, record.baseid, record.rev @@ -1008,11 +976,11 @@ def get_all_versions(self, did): except NoResultFound: record = session.query(BaseVersion).filter_by(baseid=did).first() if not record: - raise NoRecordFound('no record found') + raise NoRecordFound("no record found") else: baseid = record.baseid except MultipleResultsFound: - raise MultipleRecordsFound('multiple records found') + raise MultipleRecordsFound("multiple records found") query = session.query(IndexRecord) records = query.filter(IndexRecord.baseid == baseid).all() @@ -1037,16 +1005,17 @@ def get_latest_version(self, did, has_version=None): except NoResultFound: baseid = did except MultipleResultsFound: - raise MultipleRecordsFound('multiple records found') + raise MultipleRecordsFound("multiple records found") query = session.query(IndexRecord) - query = query.filter(IndexRecord.baseid == baseid) \ - .order_by(IndexRecord.created_date.desc()) + query = query.filter(IndexRecord.baseid == baseid).order_by( + IndexRecord.created_date.desc() + ) if has_version: query = query.filter(IndexRecord.version.isnot(None)) record = query.first() - if (not record): - raise NoRecordFound('no record found') + if not record: + raise NoRecordFound("no record found") return record.to_document_dict() @@ -1056,8 +1025,8 @@ def health_check(self): """ with self.session as session: try: - query = session.execute('SELECT 1') - except Exception as e: + query = session.execute("SELECT 1") # pylint: disable=unused-variable + except Exception: raise UnhealthyCheck() return True @@ -1089,7 +1058,7 @@ def totalbytes(self): result = session.execute(select([func.sum(IndexRecord.size)])).scalar() if result is None: return 0 - return long(result) + return int(result) def len(self): """ @@ -1097,13 +1066,17 @@ def len(self): """ with self.session as session: - return session.execute(select([func.count()]).select_from(IndexRecord)).scalar() + return session.execute( + select([func.count()]).select_from(IndexRecord) + ).scalar() def migrate_1(session, **kwargs): session.execute( - "ALTER TABLE {} ALTER COLUMN size TYPE bigint;" - .format(IndexRecord.__tablename__)) + "ALTER TABLE {} ALTER COLUMN size TYPE bigint;".format( + IndexRecord.__tablename__ + ) + ) def migrate_2(session, **kwargs): @@ -1115,20 +1088,26 @@ def migrate_2(session, **kwargs): "ALTER TABLE {} \ ADD COLUMN baseid VARCHAR DEFAULT NULL, \ ADD COLUMN created_date TIMESTAMP DEFAULT NOW(), \ - ADD COLUMN updated_date TIMESTAMP DEFAULT NOW()".format(IndexRecord.__tablename__)) + ADD COLUMN updated_date TIMESTAMP DEFAULT NOW()".format( + IndexRecord.__tablename__ + ) + ) except ProgrammingError: session.rollback() session.commit() count = session.execute( - "SELECT COUNT(*) FROM {};" - .format(IndexRecord.__tablename__)).fetchone()[0] + "SELECT COUNT(*) FROM {};".format(IndexRecord.__tablename__) + ).fetchone()[0] # create tmp_index_record table for fast retrival try: session.execute( "CREATE TABLE tmp_index_record AS SELECT did, ROW_NUMBER() OVER (ORDER BY did) AS RowNumber \ - FROM {}".format(IndexRecord.__tablename__)) + FROM {}".format( + IndexRecord.__tablename__ + ) + ) except ProgrammingError: session.rollback() @@ -1136,39 +1115,49 @@ def migrate_2(session, **kwargs): baseid = str(uuid.uuid4()) session.execute( "UPDATE index_record SET baseid = '{}'\ - WHERE did = (SELECT did FROM tmp_index_record WHERE RowNumber = {});".format(baseid, loop + 1)) + WHERE did = (SELECT did FROM tmp_index_record WHERE RowNumber = {});".format( + baseid, loop + 1 + ) + ) session.execute( - "INSERT INTO {}(baseid) VALUES('{}');".format(BaseVersion.__tablename__, baseid)) + "INSERT INTO {}(baseid) VALUES('{}');".format( + BaseVersion.__tablename__, baseid + ) + ) session.execute( "ALTER TABLE {} \ - ADD CONSTRAINT baseid_FK FOREIGN KEY (baseid) references base_version(baseid);" - .format(IndexRecord.__tablename__)) + ADD CONSTRAINT baseid_FK FOREIGN KEY (baseid) references base_version(baseid);".format( + IndexRecord.__tablename__ + ) + ) # drop tmp table - session.execute( - "DROP TABLE IF EXISTS tmp_index_record;" - ) + session.execute("DROP TABLE IF EXISTS tmp_index_record;") def migrate_3(session, **kwargs): session.execute( - "ALTER TABLE {} ADD COLUMN file_name VARCHAR;" - .format(IndexRecord.__tablename__)) + "ALTER TABLE {} ADD COLUMN file_name VARCHAR;".format(IndexRecord.__tablename__) + ) session.execute( - "CREATE INDEX {tb}__file_name_idx ON {tb} ( file_name )" - .format(tb=IndexRecord.__tablename__)) + "CREATE INDEX {tb}__file_name_idx ON {tb} ( file_name )".format( + tb=IndexRecord.__tablename__ + ) + ) def migrate_4(session, **kwargs): session.execute( - "ALTER TABLE {} ADD COLUMN version VARCHAR;" - .format(IndexRecord.__tablename__)) + "ALTER TABLE {} ADD COLUMN version VARCHAR;".format(IndexRecord.__tablename__) + ) session.execute( - "CREATE INDEX {tb}__version_idx ON {tb} ( version )" - .format(tb=IndexRecord.__tablename__)) + "CREATE INDEX {tb}__version_idx ON {tb} ( version )".format( + tb=IndexRecord.__tablename__ + ) + ) def migrate_5(session, **kwargs): @@ -1177,20 +1166,24 @@ def migrate_5(session, **kwargs): IndexRecordUrlMetadata tables """ session.execute( - "CREATE INDEX {tb}_idx ON {tb} ( did )" - .format(tb=IndexRecordUrl.__tablename__)) + "CREATE INDEX {tb}_idx ON {tb} ( did )".format(tb=IndexRecordUrl.__tablename__) + ) session.execute( - "CREATE INDEX {tb}_idx ON {tb} ( did )" - .format(tb=IndexRecordHash.__tablename__)) + "CREATE INDEX {tb}_idx ON {tb} ( did )".format(tb=IndexRecordHash.__tablename__) + ) session.execute( - "CREATE INDEX {tb}_idx ON {tb} ( did )" - .format(tb=IndexRecordMetadata.__tablename__)) + "CREATE INDEX {tb}_idx ON {tb} ( did )".format( + tb=IndexRecordMetadata.__tablename__ + ) + ) session.execute( - "CREATE INDEX {tb}_idx ON {tb} ( did )" - .format(tb=IndexRecordUrlMetadata.__tablename__)) + "CREATE INDEX {tb}_idx ON {tb} ( did )".format( + tb=IndexRecordUrlMetadata.__tablename__ + ) + ) def migrate_6(session, **kwargs): @@ -1199,15 +1192,12 @@ def migrate_6(session, **kwargs): def migrate_7(session, **kwargs): existing_acls = ( - session.query(IndexRecordMetadata) - .filter_by(key='acls').yield_per(1000) + session.query(IndexRecordMetadata).filter_by(key="acls").yield_per(1000) ) for metadata in existing_acls: - acl = metadata.value.split(',') + acl = metadata.value.split(",") for ace in acl: - entry = IndexRecordACE( - did=metadata.did, - ace=ace) + entry = IndexRecordACE(did=metadata.did, ace=ace) session.add(entry) session.delete(metadata) @@ -1217,8 +1207,11 @@ def migrate_8(session, **kwargs): create index on IndexRecord.baseid """ session.execute( - "CREATE INDEX ix_{tb}_baseid ON {tb} ( baseid )" - .format(tb=IndexRecord.__tablename__)) + "CREATE INDEX ix_{tb}_baseid ON {tb} ( baseid )".format( + tb=IndexRecord.__tablename__ + ) + ) + def migrate_9(session, **kwargs): """ @@ -1226,38 +1219,56 @@ def migrate_9(session, **kwargs): create index on IndexRecord.size """ session.execute( - "CREATE INDEX ix_{tb}_size ON {tb} ( size )" - .format(tb=IndexRecord.__tablename__)) + "CREATE INDEX ix_{tb}_size ON {tb} ( size )".format( + tb=IndexRecord.__tablename__ + ) + ) session.execute( - "CREATE INDEX index_record_hash_type_value_idx ON {tb} ( hash_value, hash_type )" - .format(tb=IndexRecordHash.__tablename__)) + "CREATE INDEX index_record_hash_type_value_idx ON {tb} ( hash_value, hash_type )".format( + tb=IndexRecordHash.__tablename__ + ) + ) + def migrate_10(session, **kwargs): session.execute( - "ALTER TABLE {} ADD COLUMN uploader VARCHAR;" - .format(IndexRecord.__tablename__)) + "ALTER TABLE {} ADD COLUMN uploader VARCHAR;".format(IndexRecord.__tablename__) + ) session.execute( - "CREATE INDEX {tb}__uploader_idx ON {tb} ( uploader )" - .format(tb=IndexRecord.__tablename__)) + "CREATE INDEX {tb}__uploader_idx ON {tb} ( uploader )".format( + tb=IndexRecord.__tablename__ + ) + ) def migrate_11(session, **kwargs): session.execute( - "ALTER TABLE {} ADD COLUMN rbac VARCHAR;" - .format(IndexRecord.__tablename__)) + "ALTER TABLE {} ADD COLUMN rbac VARCHAR;".format(IndexRecord.__tablename__) + ) def migrate_12(session, **kwargs): session.execute( - "ALTER TABLE {} DROP COLUMN rbac;".format(IndexRecord.__tablename__)) + "ALTER TABLE {} DROP COLUMN rbac;".format(IndexRecord.__tablename__) + ) # ordered schema migration functions that the index should correspond to # CURRENT_SCHEMA_VERSION - 1 when it's written SCHEMA_MIGRATION_FUNCTIONS = [ - migrate_1, migrate_2, migrate_3, migrate_4, migrate_5, - migrate_6, migrate_7, migrate_8, migrate_9, migrate_10, - migrate_11, migrate_12] + migrate_1, + migrate_2, + migrate_3, + migrate_4, + migrate_5, + migrate_6, + migrate_7, + migrate_8, + migrate_9, + migrate_10, + migrate_11, + migrate_12, +] CURRENT_SCHEMA_VERSION = len(SCHEMA_MIGRATION_FUNCTIONS) diff --git a/indexd/index/drivers/query/__init__.py b/indexd/index/drivers/query/__init__.py index df85ea482..e1f7f72a5 100644 --- a/indexd/index/drivers/query/__init__.py +++ b/indexd/index/drivers/query/__init__.py @@ -1,13 +1,20 @@ from abc import ABCMeta, abstractmethod -class URLsQueryDriver(object): +class URLsQueryDriver(object, metaclass=ABCMeta): """Relatively abstract class for URLs querying, useful when support for other drivers is added""" - __metaclass__ = ABCMeta - @abstractmethod - def query_urls(self, exclude=None, include=None, versioned=None, offset=0, limit=1000, fields="did,urls", **kwargs): + def query_urls( + self, + exclude=None, + include=None, + versioned=None, + offset=0, + limit=1000, + fields="did,urls", + **kwargs + ): """ The exclude and include patterns are used to match per record. That is a record wth 3 urls will be returned/excluded if any one of the URLs match the include/exclude patterns Args: @@ -24,8 +31,17 @@ def query_urls(self, exclude=None, include=None, versioned=None, offset=0, limit pass @abstractmethod - def query_metadata_by_key(self, key, value, url=None, versioned=None, offset=0, - limit=1000, fields="dir,urls,rev", **kwargs): + def query_metadata_by_key( + self, + key, + value, + url=None, + versioned=None, + offset=0, + limit=1000, + fields="dir,urls,rev", + **kwargs + ): """ Queries urls_metadata based on provided key and value Args: key (str): urls_metadata key diff --git a/indexd/index/drivers/query/urls.py b/indexd/index/drivers/query/urls.py index 6ae63ddf3..90b2e7399 100644 --- a/indexd/index/drivers/query/urls.py +++ b/indexd/index/drivers/query/urls.py @@ -1,13 +1,17 @@ from sqlalchemy import func, and_ from indexd.errors import UserError -from indexd.index.drivers.alchemy import IndexRecord, IndexRecordUrl, IndexRecordUrlMetadata +from indexd.index.drivers.alchemy import ( + IndexRecord, + IndexRecordUrl, + IndexRecordUrlMetadata, +) from indexd.index.drivers.query import URLsQueryDriver driver_query_map = { "sqlite": dict(array_agg=func.group_concat, string_agg=func.group_concat), - "postgresql": dict(array_agg=func.array_agg, string_agg=func.string_agg) + "postgresql": dict(array_agg=func.array_agg, string_agg=func.string_agg), } @@ -21,24 +25,39 @@ def __init__(self, alchemy_driver): """ self.driver = alchemy_driver - def query_urls(self, exclude=None, include=None, versioned=None, offset=0, limit=1000, fields="did,urls", **kwargs): + def query_urls( + self, + exclude=None, + include=None, + versioned=None, + offset=0, + limit=1000, + fields="did,urls", + **kwargs + ): if kwargs: - raise UserError("Unexpected query parameter(s) {}".format(kwargs.keys())) + raise UserError( + "Unexpected query parameter(s) {}".format(list(kwargs.keys())) + ) - versioned = versioned.lower() in ["true", "t", "yes", "y"] if versioned else None + versioned = ( + versioned.lower() in ["true", "t", "yes", "y"] if versioned else None + ) with self.driver.session as session: # special database specific functions dependent of the selected dialect q_func = driver_query_map.get(session.bind.dialect.name) - query = session.query(IndexRecordUrl.did, q_func['string_agg'](IndexRecordUrl.url, ",")) + query = session.query( + IndexRecordUrl.did, q_func["string_agg"](IndexRecordUrl.url, ",") + ) # add version filter if versioned is not None if versioned is True: # retrieve only those with a version number query = query.outerjoin(IndexRecord) query = query.filter(IndexRecord.version.isnot(None)) - elif versioned is False: # retrieve only those without a version number + elif versioned is False: # retrieve only those without a version number query = query.outerjoin(IndexRecord) query = query.filter(~IndexRecord.version.isnot(None)) @@ -46,30 +65,60 @@ def query_urls(self, exclude=None, include=None, versioned=None, offset=0, limit # add url filters if include and exclude: - query = query.having(and_(~q_func['string_agg'](IndexRecordUrl.url, ",").contains(exclude), - q_func['string_agg'](IndexRecordUrl.url, ",").contains(include))) + query = query.having( + and_( + ~q_func["string_agg"](IndexRecordUrl.url, ",").contains( + exclude + ), + q_func["string_agg"](IndexRecordUrl.url, ",").contains(include), + ) + ) elif include: - query = query.having(q_func['string_agg'](IndexRecordUrl.url, ",").contains(include)) + query = query.having( + q_func["string_agg"](IndexRecordUrl.url, ",").contains(include) + ) elif exclude: - query = query.having(~q_func['string_agg'](IndexRecordUrl.url, ",").contains(exclude)) + query = query.having( + ~q_func["string_agg"](IndexRecordUrl.url, ",").contains(exclude) + ) print(query) # [('did', 'urls')] - record_list = query.order_by(IndexRecordUrl.did.asc()).offset(offset).limit(limit).all() + record_list = ( + query.order_by(IndexRecordUrl.did.asc()) + .offset(offset) + .limit(limit) + .all() + ) return self._format_response(fields, record_list) - def query_metadata_by_key(self, key, value, url=None, versioned=None, offset=0, - limit=1000, fields="did,urls,rev", **kwargs): + def query_metadata_by_key( + self, + key, + value, + url=None, + versioned=None, + offset=0, + limit=1000, + fields="did,urls,rev", + **kwargs + ): if kwargs: - raise UserError("Unexpected query parameter(s) {}".format(kwargs.keys())) + raise UserError( + "Unexpected query parameter(s) {}".format(list(kwargs.keys())) + ) - versioned = versioned.lower() in ["true", "t", "yes", "y"] if versioned else None + versioned = ( + versioned.lower() in ["true", "t", "yes", "y"] if versioned else None + ) with self.driver.session as session: - query = session.query(IndexRecordUrlMetadata.did, - IndexRecordUrlMetadata.url, - IndexRecord.rev)\ - .filter(IndexRecord.did == IndexRecordUrlMetadata.did, - IndexRecordUrlMetadata.key == key, IndexRecordUrlMetadata.value == value) + query = session.query( + IndexRecordUrlMetadata.did, IndexRecordUrlMetadata.url, IndexRecord.rev + ).filter( + IndexRecord.did == IndexRecordUrlMetadata.did, + IndexRecordUrlMetadata.key == key, + IndexRecordUrlMetadata.value == value, + ) # filter by version if versioned is True: @@ -79,10 +128,17 @@ def query_metadata_by_key(self, key, value, url=None, versioned=None, offset=0, # add url filter if url: - query = query.filter(IndexRecordUrlMetadata.url.like("%{}%".format(url))) + query = query.filter( + IndexRecordUrlMetadata.url.like("%{}%".format(url)) + ) # [('did', 'url', 'rev')] - record_list = query.order_by(IndexRecordUrlMetadata.did.asc()).offset(offset).limit(limit).all() + record_list = ( + query.order_by(IndexRecordUrlMetadata.did.asc()) + .offset(offset) + .limit(limit) + .all() + ) return self._format_response(fields, record_list) @staticmethod diff --git a/indexd/index/errors.py b/indexd/index/errors.py index 74dfd30d1..bf75cc7f0 100644 --- a/indexd/index/errors.py +++ b/indexd/index/errors.py @@ -1,33 +1,40 @@ class BaseIndexError(Exception): - ''' + """ Base index error. - ''' + """ + class NoRecordFound(BaseIndexError): - ''' + """ No record error. - ''' + """ + class MultipleRecordsFound(BaseIndexError): - ''' + """ Multiple recordss error. - ''' + """ + class RevisionMismatch(BaseIndexError): - ''' + """ Revision mismatch. - ''' + """ + + class UnhealthyCheck(BaseIndexError): - ''' + """ Health check failed. - ''' + """ + class AddExistedColumn(BaseIndexError): - ''' + """ Existed column error. - ''' + """ + class AddExistedTable(BaseIndexError): - ''' + """ Existed table error. - ''' \ No newline at end of file + """ diff --git a/indexd/index/schema.py b/indexd/index/schema.py index fda03649a..b86cd9ed9 100644 --- a/indexd/index/schema.py +++ b/indexd/index/schema.py @@ -1,136 +1,71 @@ POST_RECORD_SCHEMA = { - "$schema": "http://json-schema.org/schema#", - "type": "object", - "additionalProperties": False, - "description": "Create a new index from hash & size", - "required": [ - "size", - "hashes", - "urls", - "form" - ], - "properties": { - "baseid": { - "type": "string", - "pattern": "^.*[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$" - }, - "form": { - "enum": [ - "object", - "container", - "multipart" - ] - }, - "size": { - "description": "Size of the data being indexed in bytes", - "type": "integer", - "minimum": 0 - }, - "file_name": { - "description": "optional file name of the object", - "type": "string", - }, - "metadata": { - "description": "optional metadata of the object", - "type": "object" - }, - "urls_metadata": { - "description": "optional urls metadata of the object", - "type": "object", - }, - "version": { - "description": "optional version string of the object", - "type": "string", - }, - "uploader": { - "description": "optional uploader of the object", - "type": "string", - }, - "urls": { - "type": "array", - "items": { - "type": "string" - } - }, - "acl": { - "type": "array", - "items": { - "type": "string" - } - }, - "authz": { - "description": "optional authorization rules of the object", - "type": "array", - "items": { - "type": "string" - } - }, - "did": { - "type": "string", - "pattern": "^.*[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$" - }, - "hashes": { - "type": "object", - "properties": { - "md5": { - "type": "string", - "pattern": "^[0-9a-f]{32}$" + "$schema": "http://json-schema.org/schema#", + "type": "object", + "additionalProperties": False, + "description": "Create a new index from hash & size", + "required": ["size", "hashes", "urls", "form"], + "properties": { + "baseid": { + "type": "string", + "pattern": "^.*[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$", }, - "sha1": { - "type": "string", - "pattern": "^[0-9a-f]{40}$" + "form": {"enum": ["object", "container", "multipart"]}, + "size": { + "description": "Size of the data being indexed in bytes", + "type": "integer", + "minimum": 0, }, - "sha256": { - "type": "string", - "pattern": "^[0-9a-f]{64}$" + "file_name": { + "description": "optional file name of the object", + "type": "string", }, - "sha512": { - "type": "string", - "pattern": "^[0-9a-f]{128}$" + "metadata": { + "description": "optional metadata of the object", + "type": "object", }, - "crc": { - "type": "string", - "pattern": "^[0-9a-f]{8}$" + "urls_metadata": { + "description": "optional urls metadata of the object", + "type": "object", }, - "etag": { - "type": "string", - "pattern": "^[0-9a-f]{32}(-\d+)?$" - } - }, - "anyOf": [ - { - "required": [ - "md5" - ] + "version": { + "description": "optional version string of the object", + "type": "string", }, - { - "required": [ - "sha1" - ] + "uploader": { + "description": "optional uploader of the object", + "type": "string", }, - { - "required": [ - "sha256" - ] + "urls": {"type": "array", "items": {"type": "string"}}, + "acl": {"type": "array", "items": {"type": "string"}}, + "authz": { + "description": "optional authorization rules of the object", + "type": "array", + "items": {"type": "string"}, }, - { - "required": [ - "sha512" - ] + "did": { + "type": "string", + "pattern": "^.*[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$", }, - { - "required": [ - "crc" - ] + "hashes": { + "type": "object", + "properties": { + "md5": {"type": "string", "pattern": "^[0-9a-f]{32}$"}, + "sha1": {"type": "string", "pattern": "^[0-9a-f]{40}$"}, + "sha256": {"type": "string", "pattern": "^[0-9a-f]{64}$"}, + "sha512": {"type": "string", "pattern": "^[0-9a-f]{128}$"}, + "crc": {"type": "string", "pattern": "^[0-9a-f]{8}$"}, + "etag": {"type": "string", "pattern": "^[0-9a-f]{32}(-\d+)?$"}, + }, + "anyOf": [ + {"required": ["md5"]}, + {"required": ["sha1"]}, + {"required": ["sha256"]}, + {"required": ["sha512"]}, + {"required": ["crc"]}, + {"required": ["etag"]}, + ], }, - { - "required": [ - "etag" - ] - } - ] - } - } + }, } PUT_RECORD_SCHEMA = { @@ -139,38 +74,13 @@ "additionalProperties": False, "description": "Update an index", "properties": { - "urls": { - "type": "array", - "items": { - "type": "string", - } - }, - "acl": { - "type": "array", - "items": { - "type": "string", - }, - }, - "authz": { - "type": "array", - "items": { - "type": "string", - }, - }, - "file_name": { - "type": ["string", "null"], - }, - "version": { - "type": ["string", "null"], - }, - "uploader": { - "type": ["string", "null"], - }, - "metadata": { - "type": "object", - }, - "urls_metadata": { - "type": "object", - }, - } + "urls": {"type": "array", "items": {"type": "string"}}, + "acl": {"type": "array", "items": {"type": "string"}}, + "authz": {"type": "array", "items": {"type": "string"}}, + "file_name": {"type": ["string", "null"]}, + "version": {"type": ["string", "null"]}, + "uploader": {"type": ["string", "null"]}, + "metadata": {"type": "object"}, + "urls_metadata": {"type": "object"}, + }, } diff --git a/indexd/index/version_data.py b/indexd/index/version_data.py index 7e79d8c8d..7b419d25e 100644 --- a/indexd/index/version_data.py +++ b/indexd/index/version_data.py @@ -1,2 +1,2 @@ -VERSION="" -COMMIT="" +VERSION = "" +COMMIT = "" diff --git a/indexd/urls/blueprint.py b/indexd/urls/blueprint.py index 5e09ff621..373598a31 100644 --- a/indexd/urls/blueprint.py +++ b/indexd/urls/blueprint.py @@ -31,7 +31,11 @@ def query(): """ record_list = blueprint.driver.query_urls(**request.args.to_dict()) - return Response(json.dumps(record_list, indent=2, separators=(', ', ': ')), 200, mimetype="application/json") + return Response( + json.dumps(record_list, indent=2, separators=(", ", ": ")), + 200, + mimetype="application/json", + ) @blueprint.route("/metadata/q") @@ -56,7 +60,11 @@ def query_metadata(): """ record_list = blueprint.driver.query_metadata_by_key(**request.args.to_dict()) - return Response(json.dumps(record_list, indent=2, separators=(', ', ': ')), 200, mimetype="application/json") + return Response( + json.dumps(record_list, indent=2, separators=(", ", ": ")), + 200, + mimetype="application/json", + ) @blueprint.record diff --git a/indexd/utils.py b/indexd/utils.py index 46a638b85..46a550c19 100644 --- a/indexd/utils.py +++ b/indexd/utils.py @@ -1,19 +1,23 @@ import logging import re + def hint_match(record, hints): for hint in hints: if re.match(hint, record): - return True + return True return False + from sqlalchemy import create_engine from sqlalchemy.engine.reflection import Inspector -def try_drop_test_data(user, database, root_user='postgres', host=''): - engine = create_engine("postgres://{user}@{host}/postgres".format( - user=root_user, host=host)) +def try_drop_test_data(user, database, root_user="postgres", host=""): + + engine = create_engine( + "postgres://{user}@{host}/postgres".format(user=root_user, host=host) + ) conn = engine.connect() conn.execute("commit") @@ -26,8 +30,16 @@ def try_drop_test_data(user, database, root_user='postgres', host=''): conn.close() -def setup_database(user, password, database, root_user='postgres', - host='', no_drop=False, no_user=False): + +def setup_database( + user, + password, + database, + root_user="postgres", + host="", + no_drop=False, + no_user=False, +): """ setup the user and database """ @@ -35,8 +47,9 @@ def setup_database(user, password, database, root_user='postgres', if not no_drop: try_drop_test_data(user, database) - engine = create_engine("postgres://{user}@{host}/postgres".format( - user=root_user, host=host)) + engine = create_engine( + "postgres://{user}@{host}/postgres".format(user=root_user, host=host) + ) conn = engine.connect() conn.execute("commit") @@ -44,16 +57,19 @@ def setup_database(user, password, database, root_user='postgres', try: conn.execute(create_stmt) except Exception: - logging.warn('Unable to create database') + logging.warn("Unable to create database") if not no_user: try: user_stmt = "CREATE USER {user} WITH PASSWORD '{password}'".format( - user=user, password=password) + user=user, password=password + ) conn.execute(user_stmt) - perm_stmt = 'GRANT ALL PRIVILEGES ON DATABASE {database} to {password}'\ - ''.format(database=database, password=password) + perm_stmt = ( + "GRANT ALL PRIVILEGES ON DATABASE {database} to {password}" + "".format(database=database, password=password) + ) conn.execute(perm_stmt) conn.execute("commit") except Exception: @@ -65,8 +81,11 @@ def create_tables(host, user, password, database): """ create tables """ - engine = create_engine("postgres://{user}:{pwd}@{host}/{db}".format( - user=user, host=host, pwd=password, db=database)) + engine = create_engine( + "postgres://{user}:{pwd}@{host}/{db}".format( + user=user, host=host, pwd=password, db=database + ) + ) conn = engine.connect() create_index_record_stm = "CREATE TABLE index_record (\ @@ -85,11 +104,12 @@ def create_tables(host, user, password, database): conn.execute(create_record_url_stm) conn.execute(create_index_schema_version_stm) except Exception: - logging.warn('Unable to create table') + logging.warn("Unable to create table") conn.close() + def check_engine_for_migrate(engine): - ''' + """ check if a db engine support database migration Args: @@ -97,12 +117,12 @@ def check_engine_for_migrate(engine): Return: bool: whether the engine support migration - ''' + """ return engine.dialect.supports_alter def init_schema_version(driver, model, version): - ''' + """ initialize schema table with a initialized singleton of version Args: @@ -111,7 +131,7 @@ def init_schema_version(driver, model, version): Return: version (int): current version number in database - ''' + """ with driver.session as s: schema_version = s.query(model).first() if not schema_version: @@ -122,7 +142,7 @@ def init_schema_version(driver, model, version): def migrate_database(driver, migrate_functions, current_schema_version, model): - ''' + """ migrate current database to match the schema version provided in current schema @@ -134,40 +154,41 @@ def migrate_database(driver, migrate_functions, current_schema_version, model): Return: None - ''' + """ db_schema_version = init_schema_version(driver, model, 0) need_migrate = (current_schema_version - db_schema_version) > 0 if not check_engine_for_migrate(driver.engine) and need_migrate: driver.logger.error( - 'Engine {} does not support alter, skip migration'.format( - driver.engine.dialect.name)) + "Engine {} does not support alter, skip migration".format( + driver.engine.dialect.name + ) + ) return - for f in migrate_functions[ - db_schema_version:current_schema_version]: + for f in migrate_functions[db_schema_version:current_schema_version]: with driver.session as s: schema_version = s.query(model).first() schema_version.version += 1 - driver.logger.info('migrating {} schema to {}'.format( - driver.__class__.__name__, - schema_version.version)) + driver.logger.info( + "migrating {} schema to {}".format( + driver.__class__.__name__, schema_version.version + ) + ) f(engine=driver.engine, session=s) s.add(schema_version) + def is_empty_database(driver): - ''' + """ check if the database is empty or not Args: driver (object): an alias or index driver instance Returns: Boolean - ''' + """ table_list = Inspector.from_engine(driver.engine).get_table_names() return len(table_list) == 0 - - - diff --git a/requirements.txt b/requirements.txt index f4074902a..c4c995d50 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,10 +3,9 @@ jsonschema==2.5.1 sqlalchemy==1.3.3 sqlalchemy-utils>=0.33.11 psycopg2>=2.7 -future>=0.16.0,<1.0.0 git+https://github.com/uc-cdis/cdislogging.git@0.0.2#egg=cdislogging -git+https://github.com/uc-cdis/indexclient.git@1.3.1#egg=indexclient +git+https://github.com/uc-cdis/indexclient.git@1.6.0#egg=indexclient git+https://github.com/uc-cdis/doiclient.git@1.0.0#egg=doiclient git+https://github.com/uc-cdis/dosclient.git@1.0.0#egg=dosclient -authutils==3.1.0 +authutils==4.0.0 gen3rbac==0.1.2 diff --git a/setup.py b/setup.py index 3d6cf3c0c..eb0658e1a 100644 --- a/setup.py +++ b/setup.py @@ -1,29 +1,24 @@ from setuptools import setup, find_packages setup( - name='indexd', - version='0.1', + name="indexd", + version="0.1", packages=find_packages(), - package_data={ - 'index': [ - 'schemas/*', - ] - }, + package_data={"index": ["schemas/*"]}, install_requires=[ - 'flask==0.12.4', - 'jsonschema==2.5.1', - 'sqlalchemy==1.3.3', - 'sqlalchemy-utils>=0.33.11', - 'psycopg2>=2.7', - 'future>=0.16.0,<1.0.0', - 'cdislogging', - 'indexclient', - 'doiclient', - 'dosclient', + "flask==0.12.4", + "jsonschema==2.5.1", + "sqlalchemy==1.3.3", + "sqlalchemy-utils>=0.33.11", + "psycopg2>=2.7", + "cdislogging", + "indexclient", + "doiclient", + "dosclient", ], dependency_links=[ "git+https://github.com/uc-cdis/cdislogging.git@0.0.2#egg=cdislogging", - "git+https://github.com/uc-cdis/indexclient.git@1.3.1#egg=indexclient", + "git+https://github.com/uc-cdis/indexclient.git@1.6.0#egg=indexclient", "git+https://github.com/uc-cdis/doiclient.git@1.0.0#egg=doiclient", "git+https://github.com/uc-cdis/dosclient.git@1.0.0#egg=dosclient", ], diff --git a/test-requirements.txt b/test-requirements.txt index 7b2cfad87..ec22659bc 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -6,5 +6,4 @@ pytest-cov==2.5.1 pytest-flask==0.8.1 PyYAML==5.1 swagger_spec_validator --e git+https://github.com/uc-cdis/cdisutils-test.git@0.2.2#egg=cdisutilstest --e git+https://github.com/uc-cdis/indexclient.git@1.4.0#egg=indexclient +-e git+https://github.com/uc-cdis/cdisutils-test.git@1.0.0#egg=cdisutilstest diff --git a/tests/alchemy.py b/tests/alchemy.py index b4c8e7435..3a10525aa 100644 --- a/tests/alchemy.py +++ b/tests/alchemy.py @@ -11,11 +11,13 @@ Base = declarative_base() CURRENT_SCHEMA_VERSION = 2 + class IndexRecord(Base): - ''' + """ Base index record representation. - ''' - __tablename__ = 'index_record' + """ + + __tablename__ = "index_record" did = Column(String, primary_key=True) @@ -24,47 +26,45 @@ class IndexRecord(Base): size = Column(BigInteger) urls = relationship( - 'IndexRecordUrl', - backref='index_record', - cascade='all, delete-orphan', + "IndexRecordUrl", backref="index_record", cascade="all, delete-orphan" ) hashes = relationship( - 'IndexRecordHash', - backref='index_record', - cascade='all, delete-orphan', + "IndexRecordHash", backref="index_record", cascade="all, delete-orphan" ) class IndexRecordUrl(Base): - ''' + """ Base index record url representation. - ''' - __tablename__ = 'index_record_url' + """ - did = Column(String, ForeignKey('index_record.did'), primary_key=True) + __tablename__ = "index_record_url" + + did = Column(String, ForeignKey("index_record.did"), primary_key=True) url = Column(String, primary_key=True) class IndexRecordHash(Base): - ''' + """ Base index record hash representation. - ''' - __tablename__ = 'index_record_hash' + """ + + __tablename__ = "index_record_hash" - did = Column(String, ForeignKey('index_record.did'), primary_key=True) + did = Column(String, ForeignKey("index_record.did"), primary_key=True) hash_type = Column(String, primary_key=True) hash_value = Column(String) class SQLAlchemyIndexTestDriver(SQLAlchemyDriverBase): - ''' + """ SQLAlchemy implementation of index driver. - ''' + """ def __init__(self, conn, logger=None, **config): super(SQLAlchemyIndexTestDriver, self).__init__(conn, **config) - self.logger = logger or get_logger('SQLAlchemyIndexTestDriver') + self.logger = logger or get_logger("SQLAlchemyIndexTestDriver") Base.metadata.bind = self.engine Base.metadata.create_all() diff --git a/tests/conftest.py b/tests/conftest.py index f2b6dd648..4c6ac6a36 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,20 +1,14 @@ from indexd import get_app import base64 import pytest + # indexd_server and indexd_client is needed as fixtures -from cdisutilstest.code.conftest import indexd_server, indexd_client # noqa +from cdisutilstest.code.conftest import indexd_server, indexd_client # noqa from cdisutilstest.code.indexd_fixture import clear_database import swagger_client from indexd import auth - -try: - reload # Python 2.7 -except NameError: - try: - from importlib import reload # Python 3.4+ - except ImportError: - from imp import reload # Python 3.0 - 3.3 +import importlib @pytest.fixture @@ -22,7 +16,8 @@ def app(): # this is to make sure sqlite is initialized # for every unittest from indexd import default_settings - reload(default_settings) + + importlib.reload(default_settings) yield get_app() try: clear_database() @@ -33,18 +28,16 @@ def app(): @pytest.fixture def user(app): - app.auth.add('test', 'test') + app.auth.add("test", "test") yield { - 'Authorization': ( - 'Basic ' + - base64.b64encode(b'test:test').decode('ascii')), - 'Content-Type': 'application/json' + "Authorization": ("Basic " + base64.b64encode(b"test:test").decode("ascii")), + "Content-Type": "application/json", } - app.auth.delete('test') + app.auth.delete("test") @pytest.fixture -def swg_config(indexd_client): # noqa +def swg_config(indexd_client): # noqa config = swagger_client.Configuration() config.host = indexd_client.url config.username = indexd_client.auth[0] @@ -81,8 +74,9 @@ def swg_query_client(swg_config): api = swagger_client.QueryApi(swagger_client.ApiClient(swg_config)) yield api -@pytest.fixture -def swg_bulk_client(swg_config): + +@pytest.fixture +def swg_bulk_client(swg_config): api = swagger_client.BulkApi(swagger_client.ApiClient(swg_config)) yield api diff --git a/tests/test_blueprint.py b/tests/test_blueprint.py index 833164d9f..cfb856628 100644 --- a/tests/test_blueprint.py +++ b/tests/test_blueprint.py @@ -13,86 +13,87 @@ DIST_CONFIG = [] -INDEX_CONFIG = { - 'driver': SQLAlchemyIndexDriver('sqlite:///index.sq3'), -} +INDEX_CONFIG = {"driver": SQLAlchemyIndexDriver("sqlite:///index.sq3")} -ALIAS_CONFIG = { - 'driver': SQLAlchemyAliasDriver('sqlite:///alias.sq3'), -} +ALIAS_CONFIG = {"driver": SQLAlchemyAliasDriver("sqlite:///alias.sq3")} -@util.removes('index.sq3') -@util.removes('alias.sq3') + +@util.removes("index.sq3") +@util.removes("alias.sq3") def test_flask_blueprint(): - ''' + """ Tests standing up the server using flask. - ''' + """ app = flask.Flask(__name__) - app.config['INDEX'] = INDEX_CONFIG - app.config['ALIAS'] = ALIAS_CONFIG - app.config['DIST'] = [] + app.config["INDEX"] = INDEX_CONFIG + app.config["ALIAS"] = ALIAS_CONFIG + app.config["DIST"] = [] app.register_blueprint(indexd_bulk_blueprint) app.register_blueprint(indexd_index_blueprint) app.register_blueprint(indexd_alias_blueprint) -@util.removes('alias.sq3') + +@util.removes("alias.sq3") def test_flask_blueprint_missing_index_config(): - ''' + """ Tests standing up the server using flask without an index config. - ''' + """ app = flask.Flask(__name__) - app.config['ALIAS'] = ALIAS_CONFIG - app.config['DIST'] = [] + app.config["ALIAS"] = ALIAS_CONFIG + app.config["DIST"] = [] with pytest.raises(Exception): app.register_blueprint(indexd_index_blueprint) app.register_blueprint(indexd_alias_blueprint) -@util.removes('alias.sq3') + +@util.removes("alias.sq3") def test_flask_blueprint_invalid_index_config(): - ''' + """ Tests standing up the server using flask without an index config. - ''' + """ app = flask.Flask(__name__) - app.config['INDEX'] = None - app.config['ALIAS'] = ALIAS_CONFIG - app.config['DIST'] = [] + app.config["INDEX"] = None + app.config["ALIAS"] = ALIAS_CONFIG + app.config["DIST"] = [] with pytest.raises(Exception): app.register_blueprint(indexd_index_blueprint) app.register_blueprint(indexd_alias_blueprint) -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_flask_blueprint_missing_alias_config(): - ''' + """ Tests standing up the server using flask without an alias config. - ''' + """ app = flask.Flask(__name__) - app.config['INDEX'] = INDEX_CONFIG - app.config['DIST'] = [] + app.config["INDEX"] = INDEX_CONFIG + app.config["DIST"] = [] with pytest.raises(Exception): app.register_blueprint(indexd_alias_blueprint) app.register_blueprint(indexd_index_blueprint) -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_flask_blueprint_invalid_alias_config(): - ''' + """ Tests standing up the server using flask without an alias config. - ''' + """ app = flask.Flask(__name__) - app.config['INDEX'] = INDEX_CONFIG - app.config['ALIAS'] = None - app.config['DIST'] = [] + app.config["INDEX"] = INDEX_CONFIG + app.config["ALIAS"] = None + app.config["DIST"] = [] with pytest.raises(Exception): app.register_blueprint(indexd_alias_blueprint) diff --git a/tests/test_client.py b/tests/test_client.py index 6b9f0620c..111878ea7 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -8,23 +8,22 @@ def get_doc( - has_metadata=True, has_baseid=False, - has_urls_metadata=False, has_version=False): + has_metadata=True, has_baseid=False, has_urls_metadata=False, has_version=False +): doc = { - 'form': 'object', - 'size': 123, - 'urls': ['s3://endpointurl/bucket/key'], - 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d682e5'} + "form": "object", + "size": 123, + "urls": ["s3://endpointurl/bucket/key"], + "hashes": {"md5": "8b9942cf415384b27cadf1f4d2d682e5"}, } if has_metadata: - doc['metadata'] = {'project_id': 'bpa-UChicago'} + doc["metadata"] = {"project_id": "bpa-UChicago"} if has_baseid: - doc['baseid'] = 'e044a62c-fd60-4203-b1e5-a62d1005f027' + doc["baseid"] = "e044a62c-fd60-4203-b1e5-a62d1005f027" if has_urls_metadata: - doc['urls_metadata'] = { - 's3://endpointurl/bucket/key': {'state': 'uploaded'}} + doc["urls_metadata"] = {"s3://endpointurl/bucket/key": {"state": "uploaded"}} if has_version: - doc['version'] = '1' + doc["version"] = "1" return doc @@ -35,37 +34,34 @@ def test_index_list(swg_index_client): def test_index_list_with_params(swg_index_client): data = get_doc() - data['urls'] = ['s3://endpointurl/bucket_2/key_2', 's3://anotherurl/bucket_2/key_2'] - data['urls_metadata'] = { - 's3://endpointurl/bucket_2/key_2': {'state': 'error', 'other': 'xxx'}, - 's3://anotherurl/bucket_2/key_2': {'state': 'error', 'other': 'xxx'} + data["urls"] = ["s3://endpointurl/bucket_2/key_2", "s3://anotherurl/bucket_2/key_2"] + data["urls_metadata"] = { + "s3://endpointurl/bucket_2/key_2": {"state": "error", "other": "xxx"}, + "s3://anotherurl/bucket_2/key_2": {"state": "error", "other": "xxx"}, } r_1 = swg_index_client.add_entry(data) data2 = get_doc() - data2['metadata'] = { - 'project_id': 'other-project', 'state': 'abc', 'other': 'xxx' + data2["metadata"] = {"project_id": "other-project", "state": "abc", "other": "xxx"} + data2["urls"] = ["s3://endpointurl/bucket/key_2", "s3://anotherurl/bucket/key_2"] + data2["urls_metadata"] = { + "s3://endpointurl/bucket/key_2": {"state": "error", "other": "xxx"} } - data2['urls'] = ['s3://endpointurl/bucket/key_2', 's3://anotherurl/bucket/key_2'] - data2['urls_metadata'] = { - 's3://endpointurl/bucket/key_2': {'state': 'error', 'other': 'xxx'}} r_2 = swg_index_client.add_entry(data2) - r = swg_index_client.list_entries(metadata='project_id:bpa-UChicago') + r = swg_index_client.list_entries(metadata="project_id:bpa-UChicago") ids = [record.did for record in r.records] assert r_1.did in ids - r = swg_index_client.list_entries(metadata='project_id:other-project') + r = swg_index_client.list_entries(metadata="project_id:other-project") ids = [record.did for record in r.records] assert r_2.did in ids - r = swg_index_client.list_entries( - hash='md5:8b9942cf415384b27cadf1f4d2d682e5') + r = swg_index_client.list_entries(hash="md5:8b9942cf415384b27cadf1f4d2d682e5") ids = [record.did for record in r.records] assert r_1.did in ids assert r_2.did in ids - r = swg_index_client.list_entries( - ids=','.join(ids)) + r = swg_index_client.list_entries(ids=",".join(ids)) ids = [record.did for record in r.records] assert r_1.did in ids @@ -74,108 +70,115 @@ def test_index_list_with_params(swg_index_client): r = swg_index_client.list_entries(limit=2) assert len(r.records) == 2 - param = {'bucket': {'state': 'error', 'other': 'xxx'}} + param = {"bucket": {"state": "error", "other": "xxx"}} r = swg_index_client.list_entries(limit=2, urls_metadata=json.dumps(param)) assert len(r.records) == 2 + def test_index_list_with_params_negate(swg_index_client): data = get_doc() r_1 = swg_index_client.add_entry(data) - data['metadata'] = {'testkey': 'test', 'project_id': 'negate-project'} + data["metadata"] = {"testkey": "test", "project_id": "negate-project"} r_2 = swg_index_client.add_entry(data) - data['urls'] = ['s3://endpointurl/bucket_2/key_2', 's3://anotherurl/bucket_2/key_2'] - data['urls_metadata'] = {'s3://endpointurl/bucket_2/key_2': {'state': 'error'}} + data["urls"] = ["s3://endpointurl/bucket_2/key_2", "s3://anotherurl/bucket_2/key_2"] + data["urls_metadata"] = {"s3://endpointurl/bucket_2/key_2": {"state": "error"}} r_3 = swg_index_client.add_entry(data) - data['urls'] = ['s3://endpointurl/bucket_2/key_2'] - data['urls_metadata'] = {'s3://endpointurl/bucket_2/key_2': {'no_state': 'uploaded'}} + data["urls"] = ["s3://endpointurl/bucket_2/key_2"] + data["urls_metadata"] = { + "s3://endpointurl/bucket_2/key_2": {"no_state": "uploaded"} + } r_4 = swg_index_client.add_entry(data) - data['urls'] = ['s3://anotherurl/bucket/key'] - data['urls_metadata'] = {'s3://anotherurl/bucket/key': {'state': 'error'}} + data["urls"] = ["s3://anotherurl/bucket/key"] + data["urls_metadata"] = {"s3://anotherurl/bucket/key": {"state": "error"}} r_5 = swg_index_client.add_entry(data) - negate_params = {'metadata': {'testkey': ''}} + negate_params = {"metadata": {"testkey": ""}} r = swg_index_client.list_entries(negate_params=json.dumps(negate_params)) ids = {record.did for record in r.records} assert {r_1.did} == ids - negate_params = {'metadata': {'project_id': 'bpa-UChicago'}} + negate_params = {"metadata": {"project_id": "bpa-UChicago"}} r = swg_index_client.list_entries(negate_params=json.dumps(negate_params)) ids = {record.did for record in r.records} assert {r_2.did, r_3.did, r_4.did, r_5.did} == ids # negate url - negate_params = {'urls': ['s3://endpointurl/bucket_2/key_2']} + negate_params = {"urls": ["s3://endpointurl/bucket_2/key_2"]} r = swg_index_client.list_entries(negate_params=json.dumps(negate_params)) ids = {record.did for record in r.records} assert ids == {r_1.did, r_2.did, r_5.did} # negate url key - negate_params = {'urls_metadata': {'s3://endpointurl/': {}}} + negate_params = {"urls_metadata": {"s3://endpointurl/": {}}} r = swg_index_client.list_entries(negate_params=json.dumps(negate_params)) ids = {record.did for record in r.records} assert ids == {r_5.did} - negate_params = {'urls_metadata': {'s3://endpointurl/': {}, 's3://anotherurl/': {}}} + negate_params = {"urls_metadata": {"s3://endpointurl/": {}, "s3://anotherurl/": {}}} r = swg_index_client.list_entries(negate_params=json.dumps(negate_params)) ids = {record.did for record in r.records} assert ids == set() # negate url_metadata key - negate_params = {'urls_metadata': {'s3://endpointurl/': {'state': ''}, 's3://anotherurl/': {}}} + negate_params = { + "urls_metadata": {"s3://endpointurl/": {"state": ""}, "s3://anotherurl/": {}} + } r = swg_index_client.list_entries(negate_params=json.dumps(negate_params)) ids = {record.did for record in r.records} assert ids == {r_4.did} # negate url_metadata value - negate_params = {'urls_metadata': {'s3://endpointurl/': {'state': 'uploaded'}}} + negate_params = {"urls_metadata": {"s3://endpointurl/": {"state": "uploaded"}}} r = swg_index_client.list_entries(negate_params=json.dumps(negate_params)) ids = {record.did for record in r.records} assert ids == {r_3.did, r_4.did, r_5.did} + def test_list_entries_with_uploader(swg_index_client): """ Test that return a list of record given uploader """ data = get_doc() - data['uploader'] = 'uploader_1' + data["uploader"] = "uploader_1" swg_index_client.add_entry(data) data = get_doc() - data['uploader'] = 'uploader_123' + data["uploader"] = "uploader_123" r2 = swg_index_client.add_entry(data) data = get_doc() - data['uploader'] = 'uploader_123' + data["uploader"] = "uploader_123" r3 = swg_index_client.add_entry(data) - r = swg_index_client.list_entries(uploader='uploader_123') + r = swg_index_client.list_entries(uploader="uploader_123") assert len(r.records) == 2 assert {r2.did, r3.did} == {r.records[0].did, r.records[1].did} - assert r.records[0].uploader == 'uploader_123' - assert r.records[1].uploader == 'uploader_123' + assert r.records[0].uploader == "uploader_123" + assert r.records[1].uploader == "uploader_123" + def test_list_entries_with_uploader_wrong_uploader(swg_index_client): """ Test that returns no record due to wrong uploader id """ data = get_doc() - data['uploader'] = 'uploader_1' + data["uploader"] = "uploader_1" r = swg_index_client.add_entry(data) data = get_doc() - data['uploader'] = 'uploader_123' + data["uploader"] = "uploader_123" r = swg_index_client.add_entry(data) data = get_doc() - data['uploader'] = 'uploader_123' + data["uploader"] = "uploader_123" r = swg_index_client.add_entry(data) - r = swg_index_client.list_entries(uploader='wrong_uploader') + r = swg_index_client.list_entries(uploader="wrong_uploader") assert len(r.records) == 0 @@ -185,54 +188,48 @@ def test_create_blank_record(swg_index_client): and optionally file_name fields: test without file name """ - doc = { - 'uploader': 'uploader_123' - } + doc = {"uploader": "uploader_123"} r = swg_index_client.create_blank_entry(doc) assert r.did assert r.rev assert r.baseid - r = swg_index_client.list_entries(uploader='uploader_123') - assert r.records[0].uploader == 'uploader_123' + r = swg_index_client.list_entries(uploader="uploader_123") + assert r.records[0].uploader == "uploader_123" assert not r.records[0].file_name assert_blank(r) + def test_create_blank_record_with_file_name(swg_index_client): """ Test that new blank records only contain the uploader and optionally file_name fields: test with file name """ - doc = { - 'uploader': 'uploader_321', - 'file_name': 'myfile.txt' - } + doc = {"uploader": "uploader_321", "file_name": "myfile.txt"} r = swg_index_client.create_blank_entry(doc) assert r.did assert r.rev assert r.baseid - r = swg_index_client.list_entries(uploader='uploader_321') - assert r.records[0].uploader == 'uploader_321' - assert r.records[0].file_name == 'myfile.txt' + r = swg_index_client.list_entries(uploader="uploader_321") + assert r.records[0].uploader == "uploader_321" + assert r.records[0].file_name == "myfile.txt" assert_blank(r) + def test_fill_size_n_hash_for_blank_record(swg_index_client): """ Test that can fill size and hashes for empty record """ - doc = {'uploader': 'uploader_123'} + doc = {"uploader": "uploader_123"} r = swg_index_client.create_blank_entry(doc) assert r.did assert r.rev did, rev = r.did, r.rev - updated = { - 'size': 10, - 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d981f5'}, - } + updated = {"size": 10, "hashes": {"md5": "8b9942cf415384b27cadf1f4d2d981f5"}} r = swg_index_client.update_blank_entry(did, rev=rev, body=updated) assert r.did == did @@ -240,7 +237,8 @@ def test_fill_size_n_hash_for_blank_record(swg_index_client): r = swg_index_client.get_entry(did) assert r.size == 10 - assert r.hashes.md5 == '8b9942cf415384b27cadf1f4d2d981f5' + assert r.hashes.md5 == "8b9942cf415384b27cadf1f4d2d981f5" + def test_get_empty_acl_authz_record(swg_index_client): """ @@ -249,16 +247,16 @@ def test_get_empty_acl_authz_record(swg_index_client): doc = get_doc() r = swg_index_client.add_entry(doc) - doc = {'uploader': 'uploader_123'} + doc = {"uploader": "uploader_123"} r2 = swg_index_client.create_blank_entry(doc) - doc = {'uploader': 'uploader_123'} + doc = {"uploader": "uploader_123"} r3 = swg_index_client.create_blank_entry(doc) r = swg_index_client.list_entries() assert len(r.records) == 3 - r = swg_index_client.list_entries(uploader='uploader_123', acl='null', authz="null") + r = swg_index_client.list_entries(uploader="uploader_123", acl="null", authz="null") assert len(r.records) == 2 assert {r2.did, r3.did} == {r.records[0].did, r.records[1].did} @@ -274,133 +272,130 @@ def test_get_empty_acl_authz_record_after_fill_size_n_hash(swg_index_client): acl/authz """ # create the first blank record, update size, hashes and acl/authz - doc = {'uploader': 'uploader_123'} + doc = {"uploader": "uploader_123"} r1 = swg_index_client.create_blank_entry(doc) - updated = { - 'size': 10, - 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d981f5'}, - } + updated = {"size": 10, "hashes": {"md5": "8b9942cf415384b27cadf1f4d2d981f5"}} did1 = r1.did r1 = swg_index_client.update_blank_entry(r1.did, rev=r1.rev, body=updated) - r1 = swg_index_client.update_entry(r1.did, rev=r1.rev, body={ - 'acl': ['read'], "authz": ["read"]}) + r1 = swg_index_client.update_entry( + r1.did, rev=r1.rev, body={"acl": ["read"], "authz": ["read"]} + ) r1 = swg_index_client.get_entry(r1.did) - assert r1.acl == ['read'] - assert r1.authz == ['read'] + assert r1.acl == ["read"] + assert r1.authz == ["read"] assert r1.did == did1 # create the second blank record, only update size hashes and urls - doc = {'uploader': 'uploader_123'} + doc = {"uploader": "uploader_123"} r2 = swg_index_client.create_blank_entry(doc) did2 = r2.did updated = { - 'size': 4, - 'hashes': {'md5': '1b9942cf415384b27cadf1f4d2d981f5'}, - 'urls': ['s3://example/1'], + "size": 4, + "hashes": {"md5": "1b9942cf415384b27cadf1f4d2d981f5"}, + "urls": ["s3://example/1"], } # create the second blank record, only update size hashes and urls - doc = {'uploader': 'uploader_123'} + doc = {"uploader": "uploader_123"} r3 = swg_index_client.create_blank_entry(doc) did3 = r3.did updated = { - 'size': 4, - 'hashes': {'md5': '1b9942cf415384b27cadf1f4d2d981f5'}, - 'urls': ['s3://example/2'], + "size": 4, + "hashes": {"md5": "1b9942cf415384b27cadf1f4d2d981f5"}, + "urls": ["s3://example/2"], } swg_index_client.update_blank_entry(r3.did, rev=r3.rev, body=updated) - r = swg_index_client.list_entries(uploader='uploader_123') + r = swg_index_client.list_entries(uploader="uploader_123") assert len(r.records) == 3 - r = swg_index_client.list_entries(uploader='uploader_123', acl='read') + r = swg_index_client.list_entries(uploader="uploader_123", acl="read") assert len(r.records) == 1 assert r.records[0].did == r1.did - - r = swg_index_client.list_entries(uploader='uploader_123', acl='write') + r = swg_index_client.list_entries(uploader="uploader_123", acl="write") assert len(r.records) == 0 - r = swg_index_client.list_entries(uploader='uploader_123', acl='null') + r = swg_index_client.list_entries(uploader="uploader_123", acl="null") assert len(r.records) == 2 assert {r.records[0].did, r.records[1].did} == {did2, did3} + def test_urls_metadata(swg_index_client): data = get_doc(has_urls_metadata=True) result = swg_index_client.add_entry(data) doc = swg_index_client.get_entry(result.did) - assert doc.urls_metadata == data['urls_metadata'] + assert doc.urls_metadata == data["urls_metadata"] - updated = {'urls_metadata': {data['urls'][0]: {'test': 'b'}}} + updated = {"urls_metadata": {data["urls"][0]: {"test": "b"}}} swg_index_client.update_entry(doc.did, rev=doc.rev, body=updated) doc = swg_index_client.get_entry(result.did) - assert doc.urls_metadata == updated['urls_metadata'] - - -@pytest.mark.parametrize('doc_urls,urls_meta,params,expected', [ - ( - [ - ['s3://endpoint/key_1'], ['s3://endpoint/key_2'], - ['s3://endpoint/key_3'] - ], - { - 's3://endpoint/key_1': {'state': 'uploaded'}, - 's3://endpoint/key_2': {'state': 'validated'}, - 's3://endpoint/key_3': {'state': 'uploaded', 'type': 'ceph'} - }, - {'s3://endpoint': {'state': 'uploaded'}}, - ['s3://endpoint/key_1', 's3://endpoint/key_3'] - ), - ( - [['s3://endpoint/key_1'], ['s3://endpoint/key_2']], - { - 's3://endpoint/key_1': {'state': 'uploaded'}, - 's3://endpoint/key_2': {'state': 'validated'}, - }, - {'s3://endpoint': {'key': 'nonexistent'}}, - [] - ), - ( - [ - ['s3://endpoint/key_1'], - ['s3://endpoint/key_2', 's3://endpoint/key_3'], - ['s3://endpoint/key_4'] - ], - { - 's3://endpoint/key_1': {'state': 'uploaded', 'type': 'cleversafe'}, - 's3://endpoint/key_2': {'state': 'uploaded', 'type': 'ceph'}, - 's3://endpoint/key_3': {'state': 'validated', 'type': 'cleversafe'}, - 's3://endpoint/key_4': {'state': 'uploaded'}, - }, - {'s3://endpoint': {'state': 'uploaded', 'type': 'cleversafe'}}, - ['s3://endpoint/key_1'] - ), - ( - [['s3://endpoint/key']], - {'s3://endpoint/key': {'state': 'whatever'}}, - {'s3://endpoint': {}}, - ['s3://endpoint/key'] - ) -]) -def test_urls_metadata_partial_match(swg_index_client, doc_urls, urls_meta, - params, expected): + assert doc.urls_metadata == updated["urls_metadata"] + + +@pytest.mark.parametrize( + "doc_urls,urls_meta,params,expected", + [ + ( + [["s3://endpoint/key_1"], ["s3://endpoint/key_2"], ["s3://endpoint/key_3"]], + { + "s3://endpoint/key_1": {"state": "uploaded"}, + "s3://endpoint/key_2": {"state": "validated"}, + "s3://endpoint/key_3": {"state": "uploaded", "type": "ceph"}, + }, + {"s3://endpoint": {"state": "uploaded"}}, + ["s3://endpoint/key_1", "s3://endpoint/key_3"], + ), + ( + [["s3://endpoint/key_1"], ["s3://endpoint/key_2"]], + { + "s3://endpoint/key_1": {"state": "uploaded"}, + "s3://endpoint/key_2": {"state": "validated"}, + }, + {"s3://endpoint": {"key": "nonexistent"}}, + [], + ), + ( + [ + ["s3://endpoint/key_1"], + ["s3://endpoint/key_2", "s3://endpoint/key_3"], + ["s3://endpoint/key_4"], + ], + { + "s3://endpoint/key_1": {"state": "uploaded", "type": "cleversafe"}, + "s3://endpoint/key_2": {"state": "uploaded", "type": "ceph"}, + "s3://endpoint/key_3": {"state": "validated", "type": "cleversafe"}, + "s3://endpoint/key_4": {"state": "uploaded"}, + }, + {"s3://endpoint": {"state": "uploaded", "type": "cleversafe"}}, + ["s3://endpoint/key_1"], + ), + ( + [["s3://endpoint/key"]], + {"s3://endpoint/key": {"state": "whatever"}}, + {"s3://endpoint": {}}, + ["s3://endpoint/key"], + ), + ], +) +def test_urls_metadata_partial_match( + swg_index_client, doc_urls, urls_meta, params, expected +): url_doc_mapping = {} for url_group in doc_urls: data = get_doc(has_urls_metadata=True) - data['urls'] = url_group - data['urls_metadata'] = {} + data["urls"] = url_group + data["urls_metadata"] = {} for url in url_group: - data['urls_metadata'][url] = urls_meta[url] + data["urls_metadata"][url] = urls_meta[url] record = swg_index_client.add_entry(data) for url in url_group: url_doc_mapping[url] = record - docs = swg_index_client.list_entries( - urls_metadata=json.dumps(params) - ) + docs = swg_index_client.list_entries(urls_metadata=json.dumps(params)) ids = {r.did for r in docs.records} assert ids == {url_doc_mapping[url].did for url in expected} @@ -411,9 +406,9 @@ def test_get_urls(swg_index_client, swg_global_client): result = swg_index_client.add_entry(data) result = swg_global_client.list_urls(ids=result.did) - url = data['urls'][0] + url = data["urls"][0] assert result.urls[0].url == url - assert result.urls[0].metadata == data['urls_metadata'][url] + assert result.urls[0].metadata == data["urls_metadata"][url] def test_index_create(swg_index_client): @@ -421,7 +416,7 @@ def test_index_create(swg_index_client): result = swg_index_client.add_entry(data) assert result.did - assert result.baseid == data['baseid'] + assert result.baseid == data["baseid"] r = swg_index_client.get_entry(result.did) assert r.acl == [] assert r.authz == [] @@ -443,7 +438,7 @@ def test_index_prepend_prefix(swg_index_client): result = swg_index_client.add_entry(data) r = swg_index_client.get_entry(result.did) assert r.did == result.did - assert r.did.startswith('testprefix:') + assert r.did.startswith("testprefix:") def test_index_get_with_baseid(swg_index_client): @@ -453,7 +448,7 @@ def test_index_get_with_baseid(swg_index_client): data2 = get_doc(has_baseid=True) r2 = swg_index_client.add_entry(data2) - r = swg_index_client.get_entry(data1['baseid']) + r = swg_index_client.get_entry(data1["baseid"]) assert r.did == r2.did @@ -465,14 +460,14 @@ def test_delete_and_recreate(swg_index_client): old_data = get_doc(has_baseid=True) new_data = get_doc(has_baseid=True) - new_data['hashes'] = {'md5': '11111111111111111111111111111111'} + new_data["hashes"] = {"md5": "11111111111111111111111111111111"} old_result = swg_index_client.add_entry(old_data) assert old_result.did - assert old_result.baseid == old_data['baseid'] + assert old_result.baseid == old_data["baseid"] # create a new doc with the same did - new_data['did'] = old_result.did + new_data["did"] = old_result.did # delete the old doc swg_index_client.delete_entry(old_result.did, old_result.rev) @@ -485,22 +480,22 @@ def test_delete_and_recreate(swg_index_client): assert new_result.did # verify that they are the same - assert new_result.baseid == new_data['baseid'] + assert new_result.baseid == new_data["baseid"] assert new_result.did == old_result.did assert new_result.baseid == old_result.baseid # verify that new data is in the new node new_doc = swg_index_client.get_entry(new_result.did) - assert new_data['baseid'] == new_doc.baseid - assert new_data['urls'] == new_doc.urls - assert new_data['hashes']['md5'] == new_doc.hashes.md5 + assert new_data["baseid"] == new_doc.baseid + assert new_data["urls"] == new_doc.urls + assert new_data["hashes"]["md5"] == new_doc.hashes.md5 def test_index_create_with_multiple_hashes(swg_index_client): data = get_doc() - data['hashes'] = { - 'md5': '8b9942cf415384b27cadf1f4d2d682e5', - 'sha1': 'fdbbca63fbec1c2b0d4eb2494ce91520ec9f55f5' + data["hashes"] = { + "md5": "8b9942cf415384b27cadf1f4d2d682e5", + "sha1": "fdbbca63fbec1c2b0d4eb2494ce91520ec9f55f5", } result = swg_index_client.add_entry(data) @@ -509,46 +504,48 @@ def test_index_create_with_multiple_hashes(swg_index_client): def test_index_create_with_valid_did(swg_index_client): data = get_doc() - data['did'] = '3d313755-cbb4-4b08-899d-7bbac1f6e67d' + data["did"] = "3d313755-cbb4-4b08-899d-7bbac1f6e67d" result = swg_index_client.add_entry(data) - assert result.did == '3d313755-cbb4-4b08-899d-7bbac1f6e67d' + assert result.did == "3d313755-cbb4-4b08-899d-7bbac1f6e67d" def test_index_create_with_acl_authz(swg_index_client): data = { - 'acl': ['a', 'b'], - 'authz': ['x', 'y'], - 'form': 'object', - 'size': 123, - 'urls': ['s3://endpointurl/bucket/key'], - 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d682e5'}} + "acl": ["a", "b"], + "authz": ["x", "y"], + "form": "object", + "size": 123, + "urls": ["s3://endpointurl/bucket/key"], + "hashes": {"md5": "8b9942cf415384b27cadf1f4d2d682e5"}, + } r = swg_index_client.add_entry(data) result = swg_index_client.get_entry(r.did) - assert result.acl == ['a', 'b'] - assert result.authz == ['x', 'y'] + assert result.acl == ["a", "b"] + assert result.authz == ["x", "y"] def test_index_create_with_duplicate_acl_authz(swg_index_client): data = { - 'acl': ['a', 'b', 'a'], - 'authz': ['x', 'y', 'x'], - 'form': 'object', - 'size': 123, - 'urls': ['s3://endpointurl/bucket/key'], - 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d682e5'}} + "acl": ["a", "b", "a"], + "authz": ["x", "y", "x"], + "form": "object", + "size": 123, + "urls": ["s3://endpointurl/bucket/key"], + "hashes": {"md5": "8b9942cf415384b27cadf1f4d2d682e5"}, + } r = swg_index_client.add_entry(data) result = swg_index_client.get_entry(r.did) - assert result.acl == ['a', 'b'] - assert result.authz == ['x', 'y'] + assert result.acl == ["a", "b"] + assert result.authz == ["x", "y"] def test_index_create_with_invalid_did(swg_index_client): data = get_doc() - data['did'] = '3d313755-cbb4-4b0fdfdfd8-899d-7bbac1f6e67dfdd' + data["did"] = "3d313755-cbb4-4b0fdfdfd8-899d-7bbac1f6e67dfdd" with pytest.raises(ApiException) as e: swg_index_client.add_entry(data) @@ -557,15 +554,15 @@ def test_index_create_with_invalid_did(swg_index_client): def test_index_create_with_prefix(swg_index_client): data = get_doc() - data['did'] = 'cdis:3d313755-cbb4-4b08-899d-7bbac1f6e67d' + data["did"] = "cdis:3d313755-cbb4-4b08-899d-7bbac1f6e67d" r = swg_index_client.add_entry(data) - assert r.did == 'cdis:3d313755-cbb4-4b08-899d-7bbac1f6e67d' + assert r.did == "cdis:3d313755-cbb4-4b08-899d-7bbac1f6e67d" def test_index_create_with_duplicate_did(swg_index_client): data = get_doc() - data['did'] = '3d313755-cbb4-4b08-899d-7bbac1f6e67d' + data["did"] = "3d313755-cbb4-4b08-899d-7bbac1f6e67d" swg_index_client.add_entry(data) @@ -576,43 +573,42 @@ def test_index_create_with_duplicate_did(swg_index_client): def test_index_create_with_file_name(swg_index_client): data = get_doc() - data['file_name'] = 'abc' + data["file_name"] = "abc" r = swg_index_client.add_entry(data) r = swg_index_client.get_entry(r.did) - assert r.file_name == 'abc' + assert r.file_name == "abc" def test_index_create_with_version(swg_index_client): data = get_doc() - data['version'] = 'ver_123' + data["version"] = "ver_123" r = swg_index_client.add_entry(data) r = swg_index_client.get_entry(r.did) - assert r.version == data['version'] + assert r.version == data["version"] + def test_index_create_blank_record(swg_index_client): - doc = { - 'uploader': 'uploader_123', - 'baseid': 'baseid_123' - } + doc = {"uploader": "uploader_123", "baseid": "baseid_123"} r = swg_index_client.create_blank_entry(doc) assert r.did res = swg_index_client.get_entry(r.did) - assert res.acl==[] + assert res.acl == [] assert res.authz == [] assert res.urls_metadata == {} assert res.size is None assert res.version is None assert res.urls_metadata == {} + def test_index_create_with_uploader(swg_index_client): data = get_doc() - data['uploader'] = 'uploader_123' + data["uploader"] = "uploader_123" r = swg_index_client.add_entry(data) r = swg_index_client.get_entry(r.did) - assert r.uploader == data['uploader'] + assert r.uploader == data["uploader"] def test_index_get_global_endpoint(swg_global_client, swg_index_client): @@ -621,13 +617,13 @@ def test_index_get_global_endpoint(swg_global_client, swg_index_client): r = swg_index_client.add_entry(data) r = swg_global_client.get_entry(r.did) - assert r.metadata == data['metadata'] - assert r.form == 'object' - assert r.size == data['size'] - assert r.urls == data['urls'] - assert r.hashes.md5 == data['hashes']['md5'] + assert r.metadata == data["metadata"] + assert r.form == "object" + assert r.size == data["size"] + assert r.urls == data["urls"] + assert r.hashes.md5 == data["hashes"]["md5"] - r2 = swg_global_client.get_entry('testprefix:'+r.did) + r2 = swg_global_client.get_entry("testprefix:" + r.did) assert r2.did == r.did @@ -637,75 +633,76 @@ def test_index_update(swg_index_client): r = swg_index_client.add_entry(data) assert r.did assert r.rev - assert swg_index_client.get_entry(r.did).metadata == data['metadata'] + assert swg_index_client.get_entry(r.did).metadata == data["metadata"] dataNew = get_doc() - del dataNew['hashes'] - del dataNew['size'] - del dataNew['form'] - dataNew['metadata'] = {'test': 'abcd'} - dataNew['version'] = 'ver123' - dataNew['acl'] = ['a', 'b'] - dataNew['authz'] = ['x', 'y'] + del dataNew["hashes"] + del dataNew["size"] + del dataNew["form"] + dataNew["metadata"] = {"test": "abcd"} + dataNew["version"] = "ver123" + dataNew["acl"] = ["a", "b"] + dataNew["authz"] = ["x", "y"] r2 = swg_index_client.update_entry(r.did, rev=r.rev, body=dataNew) assert r2.rev != r.rev result = swg_index_client.get_entry(r.did) - assert result.metadata == dataNew['metadata'] - assert result.acl == dataNew['acl'] - assert result.authz == dataNew['authz'] + assert result.metadata == dataNew["metadata"] + assert result.acl == dataNew["acl"] + assert result.authz == dataNew["authz"] data = get_doc() - data['did'] = 'cdis:3d313755-cbb4-4b08-899d-7bbac1f6e67d' + data["did"] = "cdis:3d313755-cbb4-4b08-899d-7bbac1f6e67d" r = swg_index_client.add_entry(data) assert r.did assert r.rev dataNew = { - 'urls': ['s3://endpointurl/bucket/key'], - 'file_name': 'test', - 'version': 'ver123', - } + "urls": ["s3://endpointurl/bucket/key"], + "file_name": "test", + "version": "ver123", + } r2 = swg_index_client.update_entry(r.did, rev=r.rev, body=dataNew) assert r2.rev != r.rev + def test_index_update_duplicate_acl_authz(swg_index_client): data = get_doc() r = swg_index_client.add_entry(data) assert r.did assert r.rev - assert swg_index_client.get_entry(r.did).metadata == data['metadata'] + assert swg_index_client.get_entry(r.did).metadata == data["metadata"] dataNew = get_doc() - del dataNew['hashes'] - del dataNew['size'] - del dataNew['form'] - dataNew['metadata'] = {'test': 'abcd'} - dataNew['version'] = 'ver123' - dataNew['acl'] = ['c', 'd', 'c'] - dataNew['authz'] = ['x', 'y', 'x'] + del dataNew["hashes"] + del dataNew["size"] + del dataNew["form"] + dataNew["metadata"] = {"test": "abcd"} + dataNew["version"] = "ver123" + dataNew["acl"] = ["c", "d", "c"] + dataNew["authz"] = ["x", "y", "x"] r2 = swg_index_client.update_entry(r.did, rev=r.rev, body=dataNew) assert r2.rev != r.rev result = swg_index_client.get_entry(r.did) - assert result.metadata == dataNew['metadata'] - assert result.acl == ['c', 'd'] - assert result.authz == ['x', 'y'] + assert result.metadata == dataNew["metadata"] + assert result.acl == ["c", "d"] + assert result.authz == ["x", "y"] def test_update_uploader_field(swg_index_client): data = get_doc() - data['uploader'] = 'uploader_123' + data["uploader"] = "uploader_123" r = swg_index_client.add_entry(data) assert r.did assert r.rev r = swg_index_client.get_entry(r.did) - assert r.uploader == 'uploader_123' + assert r.uploader == "uploader_123" - updated = {'uploader': 'new_uploader'} + updated = {"uploader": "new_uploader"} swg_index_client.update_entry(r.did, rev=r.rev, body=updated) r = swg_index_client.get_entry(r.did) - assert r.uploader == 'new_uploader' + assert r.uploader == "new_uploader" - updated = {'uploader': None} + updated = {"uploader": None} swg_index_client.update_entry(r.did, rev=r.rev, body=updated) r = swg_index_client.get_entry(r.did) @@ -738,17 +735,17 @@ def test_create_index_version(swg_index_client): assert r.baseid dataNew = { - 'did': 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', - 'form': 'object', - 'size': 244, - 'urls': ['s3://endpointurl/bucket2/key'], - 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d981f5'}, - 'acl': ['a'], + "did": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", + "form": "object", + "size": 244, + "urls": ["s3://endpointurl/bucket2/key"], + "hashes": {"md5": "8b9942cf415384b27cadf1f4d2d981f5"}, + "acl": ["a"], } r2 = swg_index_client.add_new_version(r.did, body=dataNew) assert r2.baseid == r.baseid - assert r2.did == dataNew['did'] + assert r2.did == dataNew["did"] def test_get_latest_version(swg_index_client): @@ -785,12 +782,13 @@ def test_alias_list(swg_alias_client): def test_alias_create(swg_alias_client): data = { - 'size': 123, - 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d682e5'}, - 'release': 'private', - 'keeper_authority': 'CRI', 'host_authorities': ['PDC'], + "size": 123, + "hashes": {"md5": "8b9942cf415384b27cadf1f4d2d682e5"}, + "release": "private", + "keeper_authority": "CRI", + "host_authorities": ["PDC"], } - ark = 'ark:/31807/TEST-abc' + ark = "ark:/31807/TEST-abc" r = swg_alias_client.upsert_entry(ark, body=data) assert r.name == ark @@ -800,12 +798,13 @@ def test_alias_create(swg_alias_client): def test_alias_get_global_endpoint(swg_alias_client, swg_global_client): data = { - 'size': 123, - 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d682e5'}, - 'release': 'private', - 'keeper_authority': 'CRI', 'host_authorities': ['PDC'], + "size": 123, + "hashes": {"md5": "8b9942cf415384b27cadf1f4d2d682e5"}, + "release": "private", + "keeper_authority": "CRI", + "host_authorities": ["PDC"], } - ark = 'ark:/31807/TEST-abc' + ark = "ark:/31807/TEST-abc" swg_alias_client.upsert_entry(ark, body=data) @@ -814,21 +813,23 @@ def test_alias_get_global_endpoint(swg_alias_client, swg_global_client): def test_alias_update(swg_alias_client): data = { - 'size': 123, - 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d682e5'}, - 'release': 'private', - 'keeper_authority': 'CRI', 'host_authorities': ['PDC'], + "size": 123, + "hashes": {"md5": "8b9942cf415384b27cadf1f4d2d682e5"}, + "release": "private", + "keeper_authority": "CRI", + "host_authorities": ["PDC"], } - ark = 'ark:/31807/TEST-abc' + ark = "ark:/31807/TEST-abc" r = swg_alias_client.upsert_entry(ark, body=data) assert r.rev dataNew = { - 'size': 456, - 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d682e5'}, - 'release': 'private', - 'keeper_authority': 'CRI', 'host_authorities': ['PDC'], + "size": 456, + "hashes": {"md5": "8b9942cf415384b27cadf1f4d2d682e5"}, + "release": "private", + "keeper_authority": "CRI", + "host_authorities": ["PDC"], } r2 = swg_alias_client.upsert_entry(ark, rev=r.rev, body=dataNew) assert r2.rev != r.rev @@ -836,12 +837,13 @@ def test_alias_update(swg_alias_client): def test_alias_delete(swg_alias_client): data = { - 'size': 123, - 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d682e5'}, - 'release': 'private', - 'keeper_authority': 'CRI', 'host_authorities': ['PDC'], + "size": 123, + "hashes": {"md5": "8b9942cf415384b27cadf1f4d2d682e5"}, + "release": "private", + "keeper_authority": "CRI", + "host_authorities": ["PDC"], } - ark = 'ark:/31807/TEST-abc' + ark = "ark:/31807/TEST-abc" r = swg_alias_client.upsert_entry(ark, body=data) assert r.rev @@ -851,68 +853,80 @@ def test_alias_delete(swg_alias_client): assert len(swg_alias_client.list_entries().aliases) == 0 -@pytest.mark.parametrize('typ,h', [ - ('md5', '8b9942cf415384b27cadf1f4d2d682e5'), - ('etag', '8b9942cf415384b27cadf1f4d2d682e5'), - ('etag', '8b9942cf415384b27cadf1f4d2d682e5-2311'), - ('sha1', '1b64db0c5ef4fa349b5e37403c745e7ef4caa350'), - ('sha256', '4ff2d1da9e33bb0c45f7b0e5faa1a5f5' + - 'e6250856090ff808e2c02be13b6b4258'), - ('sha512', '65de2c01a38d2d88bd182526305' + - '56ed443b56fd51474cb7c0930d0b62b608' + - 'a3c7d9e27d53269f9a356a2af9bd4c18d5' + - '368e66dd9f2412b82e325de3c5a4c21b3'), - ('crc', '997a6f5c'), -]) +@pytest.mark.parametrize( + "typ,h", + [ + ("md5", "8b9942cf415384b27cadf1f4d2d682e5"), + ("etag", "8b9942cf415384b27cadf1f4d2d682e5"), + ("etag", "8b9942cf415384b27cadf1f4d2d682e5-2311"), + ("sha1", "1b64db0c5ef4fa349b5e37403c745e7ef4caa350"), + ( + "sha256", + "4ff2d1da9e33bb0c45f7b0e5faa1a5f5" + "e6250856090ff808e2c02be13b6b4258", + ), + ( + "sha512", + "65de2c01a38d2d88bd182526305" + + "56ed443b56fd51474cb7c0930d0b62b608" + + "a3c7d9e27d53269f9a356a2af9bd4c18d5" + + "368e66dd9f2412b82e325de3c5a4c21b3", + ), + ("crc", "997a6f5c"), + ], +) def test_good_hashes(client, user, typ, h): data = { - 'form': 'object', - 'size': 123, - 'urls': ['s3://endpointurl/bucket/key'], - 'file_name': 'abc', - 'version': 'ver_123', - 'hashes': {typ: h} + "form": "object", + "size": 123, + "urls": ["s3://endpointurl/bucket/key"], + "file_name": "abc", + "version": "ver_123", + "hashes": {typ: h}, } - resp = client.post('/index/', data=json.dumps(data), headers=user) + resp = client.post("/index/", data=json.dumps(data), headers=user) assert resp.status_code == 200 json_resp = resp.json - assert 'error' not in json_resp - - -@pytest.mark.parametrize('typ,h', [ - ('', ''), - ('blah', 'aaa'), - ('not_supported', '8b9942cf415384b27cadf1f4d2d682e5'), - ('md5', 'not valid'), - ('crc', 'not valid'), - ('etag', ''), - ('etag', '8b9942cf415384b27cadf1f4d2d682e5-'), - ('etag', '8b9942cf415384b27cadf1f4d2d682e5-afffafb'), - ('sha1', '8b9942cf415384b27cadf1f4d2d682e5'), - ('sha256', 'not valid'), - ('sha512', 'not valid'), -]) + assert "error" not in json_resp + + +@pytest.mark.parametrize( + "typ,h", + [ + ("", ""), + ("blah", "aaa"), + ("not_supported", "8b9942cf415384b27cadf1f4d2d682e5"), + ("md5", "not valid"), + ("crc", "not valid"), + ("etag", ""), + ("etag", "8b9942cf415384b27cadf1f4d2d682e5-"), + ("etag", "8b9942cf415384b27cadf1f4d2d682e5-afffafb"), + ("sha1", "8b9942cf415384b27cadf1f4d2d682e5"), + ("sha256", "not valid"), + ("sha512", "not valid"), + ], +) def test_bad_hashes(client, user, typ, h): data = { - 'form': 'object', - 'size': 123, - 'urls': ['s3://endpointurl/bucket/key'], - 'file_name': 'abc', - 'version': 'ver_123', - 'hashes': {typ: h} + "form": "object", + "size": 123, + "urls": ["s3://endpointurl/bucket/key"], + "file_name": "abc", + "version": "ver_123", + "hashes": {typ: h}, } - resp = client.post('/index/', data=json.dumps(data), headers=user) + resp = client.post("/index/", data=json.dumps(data), headers=user) assert resp.status_code == 400 json_resp = resp.json - assert 'error' in json_resp + assert "error" in json_resp if typ not in ACCEPTABLE_HASHES: - assert 'is not valid' in json_resp['error'] + assert "is not valid" in json_resp["error"] else: - assert 'does not match' in json_resp['error'] + assert "does not match" in json_resp["error"] + def test_dos_get(swg_index_client, swg_dos_client): data = get_doc(has_urls_metadata=True, has_metadata=True, has_baseid=True) @@ -924,8 +938,8 @@ def test_dos_get(swg_index_client, swg_dos_client): assert r.data_object.checksums[0].checksum == "8b9942cf415384b27cadf1f4d2d682e5" assert r.data_object.checksums[0].type == "md5" assert r.data_object.urls[0].url == "s3://endpointurl/bucket/key" - assert r.data_object.urls[0].user_metadata['state'] == "uploaded" - assert r.data_object.urls[0].system_metadata['project_id'] == "bpa-UChicago" + assert r.data_object.urls[0].user_metadata["state"] == "uploaded" + assert r.data_object.urls[0].system_metadata["project_id"] == "bpa-UChicago" r2 = swg_dos_client.get_data_object(result.baseid) assert r2.data_object.id == result.did @@ -941,8 +955,8 @@ def test_dos_list(swg_index_client, swg_dos_client): assert r.data_objects[0].checksums[0].checksum == "8b9942cf415384b27cadf1f4d2d682e5" assert r.data_objects[0].checksums[0].type == "md5" assert r.data_objects[0].urls[0].url == "s3://endpointurl/bucket/key" - assert r.data_objects[0].urls[0].user_metadata['state'] == "uploaded" - assert r.data_objects[0].urls[0].system_metadata['project_id'] == "bpa-UChicago" + assert r.data_objects[0].urls[0].user_metadata["state"] == "uploaded" + assert r.data_objects[0].urls[0].system_metadata["project_id"] == "bpa-UChicago" def test_update_without_changing_fields(swg_index_client): @@ -953,7 +967,7 @@ def test_update_without_changing_fields(swg_index_client): first_doc = swg_index_client.get_entry(result.did) # update - updated = {'version': 'at least 2'} + updated = {"version": "at least 2"} swg_index_client.update_entry(first_doc.did, rev=first_doc.rev, body=updated) # Check if update successful. @@ -969,7 +983,7 @@ def test_update_without_changing_fields(swg_index_client): # Change `version` to null. # update - updated = {'version': None} + updated = {"version": None} swg_index_client.update_entry(second_doc.did, rev=second_doc.rev, body=updated) # check if update successful @@ -977,17 +991,15 @@ def test_update_without_changing_fields(swg_index_client): # Only `version` changed. assert second_doc.version != third_doc.version + def test_bulk_get_documents(swg_index_client, swg_bulk_client): # just make a bunch of entries in indexd - dids = [ - swg_index_client.add_entry(get_doc(has_baseid=True)).did - for _ in range(20) - ] + dids = [swg_index_client.add_entry(get_doc(has_baseid=True)).did for _ in range(20)] # do a bulk query for them all docs = swg_bulk_client.get_bulk_ids(dids) # compare that they are the same by did for doc in docs: - assert doc['did'] in dids + assert doc["did"] in dids diff --git a/tests/test_driver_alchemy_auth.py b/tests/test_driver_alchemy_auth.py index d1a35ef02..7afbd3b44 100644 --- a/tests/test_driver_alchemy_auth.py +++ b/tests/test_driver_alchemy_auth.py @@ -10,75 +10,90 @@ from indexd.auth.drivers.alchemy import SQLAlchemyAuthDriver -USERNAME = 'abc' -PASSWORD = '123' +USERNAME = "abc" +PASSWORD = "123" DIGESTED = SQLAlchemyAuthDriver.digest(PASSWORD) # TODO check if pytest has utilities for meta-programming of tests -@util.removes('auth.sq3') + +@util.removes("auth.sq3") def test_driver_init_does_not_create_records(): - ''' + """ Tests for creation of records after driver init. Tests driver init does not have unexpected side-effects. - ''' - driver = SQLAlchemyAuthDriver('sqlite:///auth.sq3') + """ + driver = SQLAlchemyAuthDriver("sqlite:///auth.sq3") # pylint: disable=unused-variable - with sqlite3.connect('auth.sq3') as conn: + with sqlite3.connect("auth.sq3") as conn: - count = conn.execute(''' + count = conn.execute( + """ SELECT COUNT(*) FROM auth_record - ''').fetchone()[0] + """ + ).fetchone()[0] + + assert count == 0, "driver created records upon initilization" - assert count == 0, 'driver created records upon initilization' -@util.removes('auth.sq3') +@util.removes("auth.sq3") def test_driver_auth_accepts_good_creds(): - ''' + """ Tests driver accepts good creds. - ''' - driver = SQLAlchemyAuthDriver('sqlite:///auth.sq3') + """ + driver = SQLAlchemyAuthDriver("sqlite:///auth.sq3") - with sqlite3.connect('auth.sq3') as conn: + with sqlite3.connect("auth.sq3") as conn: - conn.execute(''' + conn.execute( + """ INSERT INTO auth_record VALUES (?,?) - ''', (USERNAME, DIGESTED)) + """, + (USERNAME, DIGESTED), + ) driver.auth(USERNAME, PASSWORD) -@util.removes('auth.sq3') + +@util.removes("auth.sq3") def test_driver_auth_rejects_bad_creds(): - ''' + """ Test driver rejects bad creds. - ''' - driver = SQLAlchemyAuthDriver('sqlite:///auth.sq3') + """ + driver = SQLAlchemyAuthDriver("sqlite:///auth.sq3") - with sqlite3.connect('auth.sq3') as conn: + with sqlite3.connect("auth.sq3") as conn: - conn.execute(''' + conn.execute( + """ INSERT INTO auth_record VALUES (?, ?) - ''', (USERNAME, DIGESTED)) + """, + (USERNAME, DIGESTED), + ) with pytest.raises(AuthError): - driver.auth(USERNAME, 'invalid_'+PASSWORD) + driver.auth(USERNAME, "invalid_" + PASSWORD) with pytest.raises(AuthError): - driver.auth('invalid_'+USERNAME, PASSWORD) + driver.auth("invalid_" + USERNAME, PASSWORD) + -@util.removes('auth.sq3') +@util.removes("auth.sq3") def test_driver_auth_returns_user_context(): - ''' + """ Tests driver accepts good creds. - ''' - driver = SQLAlchemyAuthDriver('sqlite:///auth.sq3') + """ + driver = SQLAlchemyAuthDriver("sqlite:///auth.sq3") - with sqlite3.connect('auth.sq3') as conn: + with sqlite3.connect("auth.sq3") as conn: - conn.execute(''' + conn.execute( + """ INSERT INTO auth_record VALUES (?,?) - ''', (USERNAME, DIGESTED)) + """, + (USERNAME, DIGESTED), + ) user = driver.auth(USERNAME, PASSWORD) - assert user is not None, 'user context was None' + assert user is not None, "user context was None" diff --git a/tests/test_driver_alchemy_crud.py b/tests/test_driver_alchemy_crud.py index bcf85a40f..824b80977 100644 --- a/tests/test_driver_alchemy_crud.py +++ b/tests/test_driver_alchemy_crud.py @@ -15,368 +15,430 @@ from datetime import datetime -#TODO check if pytest has utilities for meta-programming of tests +# TODO check if pytest has utilities for meta-programming of tests -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_driver_init_does_not_create_records(): - ''' + """ Tests for creation of records after driver init. Tests driver init does not have unexpected side-effects. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") - count = conn.execute(''' + count = conn.execute( + """ SELECT COUNT(*) FROM index_record - ''').fetchone()[0] + """ + ).fetchone()[0] + + assert count == 0, "driver created records upon initilization" - assert count == 0, 'driver created records upon initilization' -@util.removes('index.sq3') +@util.removes("index.sq3") def test_driver_init_does_not_create_record_urls(): - ''' + """ Tests for creation of urls after driver init. Tests driver init does not have unexpected side-effects. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") - count = conn.execute(''' + count = conn.execute( + """ SELECT COUNT(*) FROM index_record_url - ''').fetchone()[0] + """ + ).fetchone()[0] - assert count == 0, 'driver created records urls upon initilization' + assert count == 0, "driver created records urls upon initilization" -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_driver_init_does_not_create_record_hashes(): - ''' + """ Tests for creation of hashes after driver init. Tests driver init does not have unexpected side-effects. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") # pylint: disable=unused-variable - count = conn.execute(''' + count = conn.execute( + """ SELECT COUNT(*) FROM index_record_hash - ''').fetchone()[0] + """ + ).fetchone()[0] + + assert count == 0, "driver created records hashes upon initilization" - assert count == 0, 'driver created records hashes upon initilization' -@util.removes('index.sq3') +@util.removes("index.sq3") def test_driver_add_object_record(): - ''' + """ Tests creation of a record. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") - driver.add('object') + driver.add("object") - count = conn.execute(''' + count = conn.execute( + """ SELECT COUNT(*) FROM index_record - ''').fetchone()[0] + """ + ).fetchone()[0] - assert count == 1, 'driver did not create record' + assert count == 1, "driver did not create record" - record = conn.execute(''' + record = conn.execute( + """ SELECT * FROM index_record - ''').fetchone() + """ + ).fetchone() - assert record[0], 'record id not populated' - assert record[1], 'record baseid not populated' - assert record[2], 'record rev not populated' - assert record[3] == 'object', 'record form is not object' - assert record[4] is None, 'record size non-null' + assert record[0], "record id not populated" + assert record[1], "record baseid not populated" + assert record[2], "record rev not populated" + assert record[3] == "object", "record form is not object" + assert record[4] is None, "record size non-null" -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_driver_add_container_record(): - ''' + """ Tests creation of a record. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") - driver.add('container') + driver.add("container") - count = conn.execute(''' + count = conn.execute( + """ SELECT COUNT(*) FROM index_record - ''').fetchone()[0] + """ + ).fetchone()[0] - assert count == 1, 'driver did not create record' + assert count == 1, "driver did not create record" - record = conn.execute(''' + record = conn.execute( + """ SELECT * FROM index_record - ''').fetchone() + """ + ).fetchone() + + assert record[0], "record id not populated" + assert record[1], "record baseid not populated" + assert record[2], "record rev not populated" + assert record[3] == "container", "record form is not container" + assert record[4] == None, "record size non-null" - assert record[0], 'record id not populated' - assert record[1], 'record baseid not populated' - assert record[2], 'record rev not populated' - assert record[3] == 'container', 'record form is not container' - assert record[4] == None, 'record size non-null' -@util.removes('index.sq3') +@util.removes("index.sq3") def test_driver_add_multipart_record(): - ''' + """ Tests creation of a record. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") - driver.add('multipart') + driver.add("multipart") - count = conn.execute(''' + count = conn.execute( + """ SELECT COUNT(*) FROM index_record - ''').fetchone()[0] + """ + ).fetchone()[0] - assert count == 1, 'driver did not create record' + assert count == 1, "driver did not create record" - record = conn.execute(''' + record = conn.execute( + """ SELECT * FROM index_record - ''').fetchone() + """ + ).fetchone() - assert record[0], 'record id not populated' - assert record[1], 'record baseid not populated' - assert record[2], 'record rev not populated' - assert record[3] == 'multipart', 'record form is not multipart' - assert record[4] == None, 'record size non-null' + assert record[0], "record id not populated" + assert record[1], "record baseid not populated" + assert record[2], "record rev not populated" + assert record[3] == "multipart", "record form is not multipart" + assert record[4] == None, "record size non-null" -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_driver_add_with_valid_did(): - ''' + """ Tests creation of a record with given valid did. - ''' - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + """ + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") - form = 'object' - did = '3d313755-cbb4-4b08-899d-7bbac1f6e67d' - driver.add(form, did = did) + form = "object" + did = "3d313755-cbb4-4b08-899d-7bbac1f6e67d" + driver.add(form, did=did) with driver.session as s: assert s.query(IndexRecord).first().did == did -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_driver_add_with_duplicate_did(): - ''' + """ Tests creation of a record with duplicate did. - ''' - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + """ + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") - form = 'object' - did = '3d313755-cbb4-4b08-899d-7bbac1f6e67d' - driver.add(form, did = did) + form = "object" + did = "3d313755-cbb4-4b08-899d-7bbac1f6e67d" + driver.add(form, did=did) with pytest.raises(UserError): - driver.add(form, did = did) + driver.add(form, did=did) -@util.removes('index.sq3') +@util.removes("index.sq3") def test_driver_add_multiple_records(): - ''' + """ Tests creation of a record. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") - driver.add('object') - driver.add('object') - driver.add('object') + driver.add("object") + driver.add("object") + driver.add("object") - count = conn.execute(''' + count = conn.execute( + """ SELECT COUNT(*) FROM index_record - ''').fetchone()[0] + """ + ).fetchone()[0] - assert count == 3, 'driver did not create record(s)' + assert count == 3, "driver did not create record(s)" - records = conn.execute(''' + records = conn.execute( + """ SELECT * FROM index_record - ''') + """ + ) for record in records: - assert record[0], 'record id not populated' - assert record[1], 'record baseid not populated' - assert record[2], 'record rev not populated' - assert record[3] == 'object', 'record form is not object' - assert record[4] == None, 'record size non-null' + assert record[0], "record id not populated" + assert record[1], "record baseid not populated" + assert record[2], "record rev not populated" + assert record[3] == "object", "record form is not object" + assert record[4] == None, "record size non-null" -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_driver_add_with_size(): - ''' + """ Tests creation of a record with size. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") - form = 'object' + form = "object" size = 512 driver.add(form, size=size) - count = conn.execute(''' + count = conn.execute( + """ SELECT COUNT(*) FROM index_record - ''').fetchone()[0] + """ + ).fetchone()[0] - assert count == 1, 'driver did not create record' + assert count == 1, "driver did not create record" - new_form, new_size = conn.execute(''' + new_form, new_size = conn.execute( + """ SELECT form, size FROM index_record - ''').fetchone() + """ + ).fetchone() + + assert form == new_form, "record form mismatch" + assert size == new_size, "record size mismatch" - assert form == new_form, 'record form mismatch' - assert size == new_size, 'record size mismatch' -@util.removes('index.sq3') +@util.removes("index.sq3") def test_driver_add_with_urls(): - ''' + """ Tests creation of a record with urls. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") - form = 'object' - urls = ['a', 'b', 'c'] + form = "object" + urls = ["a", "b", "c"] driver.add(form, urls=urls) - count = conn.execute(''' + count = conn.execute( + """ SELECT COUNT(*) FROM index_record - ''').fetchone()[0] + """ + ).fetchone()[0] - assert count == 1, 'driver did not create record' + assert count == 1, "driver did not create record" - count = conn.execute(''' + count = conn.execute( + """ SELECT COUNT(*) FROM index_record_url - ''').fetchone()[0] + """ + ).fetchone()[0] - assert count == 3, 'driver did not create url(s)' + assert count == 3, "driver did not create url(s)" - new_urls = sorted(url[0] for url in conn.execute(''' + new_urls = sorted( + url[0] + for url in conn.execute( + """ SELECT url FROM index_record_url - ''')) + """ + ) + ) - assert urls == new_urls, 'record urls mismatch' + assert urls == new_urls, "record urls mismatch" -@util.removes('index.sq3') +@util.removes("index.sq3") def test_driver_add_with_filename(): - ''' + """ Tests creation of a record with filename. - ''' - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + """ + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") - form = 'object' - file_name = 'abc' + form = "object" + file_name = "abc" driver.add(form, file_name=file_name) with driver.session as s: - assert s.query(IndexRecord).first().file_name == 'abc' + assert s.query(IndexRecord).first().file_name == "abc" -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_driver_add_with_version(): - ''' + """ Tests creation of a record with version string. - ''' - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + """ + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") - form = 'object' - version = 'ver_123' + form = "object" + version = "ver_123" driver.add(form, version=version) with driver.session as s: - assert s.query(IndexRecord).first().version == 'ver_123' + assert s.query(IndexRecord).first().version == "ver_123" + -@util.removes('index.sq3') +@util.removes("index.sq3") def test_driver_add_with_hashes(): - ''' + """ Tests creation of a record with hashes. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") - form = 'object' - hashes = { - 'a': '1', - 'b': '2', - 'c': '3', - } + form = "object" + hashes = {"a": "1", "b": "2", "c": "3"} driver.add(form, hashes=hashes) - count = conn.execute(''' + count = conn.execute( + """ SELECT COUNT(*) FROM index_record - ''').fetchone()[0] + """ + ).fetchone()[0] - assert count == 1, 'driver did not create record' + assert count == 1, "driver did not create record" - count = conn.execute(''' + count = conn.execute( + """ SELECT COUNT(*) FROM index_record_hash - ''').fetchone()[0] + """ + ).fetchone()[0] - assert count == 3, 'driver did not create hash(es)' + assert count == 3, "driver did not create hash(es)" - new_hashes = {h:v for h, v in conn.execute(''' + new_hashes = { + h: v + for h, v in conn.execute( + """ SELECT hash_type, hash_value FROM index_record_hash - ''')} + """ + ) + } + + assert hashes == new_hashes, "record hashes mismatch" - assert hashes == new_hashes, 'record hashes mismatch' -@util.removes('index.sq3') +@util.removes("index.sq3") def test_driver_get_record(): - ''' + """ Tests retrieval of a record. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") did = str(uuid.uuid4()) baseid = str(uuid.uuid4()) rev = str(uuid.uuid4())[:8] size = 512 - form = 'object' + form = "object" baseid = str(uuid.uuid4()) created_date = datetime.now() updated_date = datetime.now() - conn.execute(''' + conn.execute( + """ INSERT INTO index_record(did, baseid, rev, form, size, created_date, updated_date) VALUES (?,?,?,?,?,?,?) - ''', (did, baseid, rev, form, size, created_date, updated_date)) + """, + (did, baseid, rev, form, size, created_date, updated_date), + ) conn.commit() record = driver.get(did) - assert record['did'] == did, 'record id does not match' - assert record['baseid'] == baseid, 'record id does not match' - assert record['rev'] == rev, 'record revision does not match' - assert record['size'] == size, 'record size does not match' - assert record['form'] == form, 'record form does not match' - assert record['created_date'] == created_date.isoformat(), 'created date does not match' - assert record['updated_date'] == updated_date.isoformat(), 'updated date does not match' + assert record["did"] == did, "record id does not match" + assert record["baseid"] == baseid, "record id does not match" + assert record["rev"] == rev, "record revision does not match" + assert record["size"] == size, "record size does not match" + assert record["form"] == form, "record form does not match" + assert ( + record["created_date"] == created_date.isoformat() + ), "created date does not match" + assert ( + record["updated_date"] == updated_date.isoformat() + ), "updated date does not match" -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_driver_get_fails_with_no_records(): - ''' + """ Tests retrieval of a record fails if there are no records. - ''' - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + """ + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") with pytest.raises(NoRecordFound): - driver.get('some_record_that_does_not_exist') + driver.get("some_record_that_does_not_exist") + -@util.removes('index.sq3') +@util.removes("index.sq3") def test_driver_get_latest_version(): - ''' + """ Tests retrieval of the lattest record version - ''' - with sqlite3.connect('index.sq3') as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + """ + with sqlite3.connect("index.sq3") as conn: + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") baseid = str(uuid.uuid4()) for _ in range(10): @@ -384,59 +446,71 @@ def test_driver_get_latest_version(): did = str(uuid.uuid4()) rev = str(uuid.uuid4())[:8] size = 512 - form = 'object' + form = "object" baseid = str(uuid.uuid4()) created_date = datetime.now() updated_date = datetime.now() - conn.execute(''' + conn.execute( + """ INSERT INTO index_record(did, baseid, rev, form, size, created_date, updated_date) VALUES (?,?,?,?,?,?,?) - ''', (did, baseid, rev, form, size, created_date, updated_date)) + """, + (did, baseid, rev, form, size, created_date, updated_date), + ) conn.commit() record = driver.get_latest_version(did) - assert record['did'] == did, 'record id does not match' - assert record['rev'] == rev, 'record revision does not match' - assert record['size'] == size, 'record size does not match' - assert record['form'] == form, 'record form does not match' - assert record['created_date'] == created_date.isoformat(), 'created date does not match' - assert record['updated_date'] == updated_date.isoformat(), 'updated date does not match' + assert record["did"] == did, "record id does not match" + assert record["rev"] == rev, "record revision does not match" + assert record["size"] == size, "record size does not match" + assert record["form"] == form, "record form does not match" + assert ( + record["created_date"] == created_date.isoformat() + ), "created date does not match" + assert ( + record["updated_date"] == updated_date.isoformat() + ), "updated date does not match" -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_driver_get_latest_version_with_no_record(): - ''' + """ Tests retrieval of the lattest record version - ''' - with sqlite3.connect('index.sq3') as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + """ + with sqlite3.connect("index.sq3") as conn: + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") for _ in range(10): did = str(uuid.uuid4()) rev = str(uuid.uuid4())[:8] size = 512 - form = 'object' + form = "object" baseid = str(uuid.uuid4()) dt = datetime.now() - conn.execute(''' + conn.execute( + """ INSERT INTO index_record(did, baseid, rev, form, size, created_date, updated_date) VALUES (?,?,?,?,?,?,?) - ''', (did, baseid, rev, form, size, dt, dt)) + """, + (did, baseid, rev, form, size, dt, dt), + ) conn.commit() with pytest.raises(NoRecordFound): - driver.get_latest_version('some base version') + driver.get_latest_version("some base version") + -@util.removes('index.sq3') +@util.removes("index.sq3") def test_driver_get_all_version(): - ''' + """ Tests retrieval of the lattest record version - ''' - with sqlite3.connect('index.sq3') as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + """ + with sqlite3.connect("index.sq3") as conn: + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") baseid = str(uuid.uuid4()) NUMBER_OF_RECORD = 3 @@ -451,7 +525,7 @@ def test_driver_get_all_version(): did = str(uuid.uuid4()) rev = str(uuid.uuid4())[:8] size = 512 - form = 'object' + form = "object" created_date = datetime.now() updated_date = created_date @@ -460,32 +534,40 @@ def test_driver_get_all_version(): created_dates.append(created_date) updated_dates.append(updated_date) - conn.execute(''' + conn.execute( + """ INSERT INTO index_record(did, baseid, rev, form, size, created_date, updated_date) \ VALUES (?,?,?,?,?,?,?) - ''', (did, baseid, rev, form, size, created_date, updated_date)) + """, + (did, baseid, rev, form, size, created_date, updated_date), + ) conn.commit() records = driver.get_all_versions(did) - assert len(records) == NUMBER_OF_RECORD, 'the number of records does not match' + assert len(records) == NUMBER_OF_RECORD, "the number of records does not match" for i in range(NUMBER_OF_RECORD): record = records[i] - assert record['did'] == dids[i], 'record id does not match' - assert record['rev'] == revs[i], 'record revision does not match' - assert record['size'] == size, 'record size does not match' - assert record['form'] == form, 'record form does not match' - assert record['created_date'] == created_dates[i].isoformat(), 'created date does not match' - assert record['updated_date'] == updated_dates[i].isoformat(), 'updated date does not match' - -@util.removes('index.sq3') + assert record["did"] == dids[i], "record id does not match" + assert record["rev"] == revs[i], "record revision does not match" + assert record["size"] == size, "record size does not match" + assert record["form"] == form, "record form does not match" + assert ( + record["created_date"] == created_dates[i].isoformat() + ), "created date does not match" + assert ( + record["updated_date"] == updated_dates[i].isoformat() + ), "updated date does not match" + + +@util.removes("index.sq3") def test_driver_get_all_version_with_no_record(): - ''' + """ Tests retrieval of the lattest record version - ''' - with sqlite3.connect('index.sq3') as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + """ + with sqlite3.connect("index.sq3") as conn: + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") baseid = str(uuid.uuid4()) for _ in range(3): @@ -493,243 +575,286 @@ def test_driver_get_all_version_with_no_record(): did = str(uuid.uuid4()) rev = str(uuid.uuid4())[:8] size = 512 - form = 'object' + form = "object" - conn.execute(''' + conn.execute( + """ INSERT INTO index_record(did, baseid, rev, form, size) VALUES (?,?,?,?,?) - ''', (did, baseid, rev, form, size)) + """, + (did, baseid, rev, form, size), + ) conn.commit() with pytest.raises(NoRecordFound): - driver.get_all_versions('some baseid') + driver.get_all_versions("some baseid") -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_driver_get_fails_with_invalid_id(): - ''' + """ Tests retrieval of a record fails if the record id is not found. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") did = str(uuid.uuid4()) baseid = str(uuid.uuid4()) rev = str(uuid.uuid4())[:8] - form = 'object' + form = "object" - conn.execute(''' + conn.execute( + """ INSERT INTO index_record(did, baseid, rev, form, size) VALUES (?,?,?,?,?) - ''', (did, baseid, rev, form, None)) + """, + (did, baseid, rev, form, None), + ) conn.commit() with pytest.raises(NoRecordFound): - driver.get('some_record_that_does_not_exist') + driver.get("some_record_that_does_not_exist") def test_driver_update_record(skip_authz): _test_driver_update_record() -@util.removes('index.sq3') +@util.removes("index.sq3") def _test_driver_update_record(): - ''' + """ Tests updating of a record. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") did = str(uuid.uuid4()) baseid = str(uuid.uuid4()) rev = str(uuid.uuid4())[:8] - form = 'object' + form = "object" - conn.execute(''' + conn.execute( + """ INSERT INTO index_record(did, baseid, rev, form, size) VALUES (?,?,?,?,?) - ''', (did, baseid, rev, form, None)) + """, + (did, baseid, rev, form, None), + ) conn.commit() - update_size = 256 - update_urls = ['a', 'b', 'c'] - update_hashes = { - 'a': '1', - 'b': '2', - 'c': '3', - } + # update_size = 256 + update_urls = ["a", "b", "c"] + # update_hashes = {"a": "1", "b": "2", "c": "3"} - file_name = 'test' - version = 'ver_123' + file_name = "test" + version = "ver_123" changing_fields = { - 'urls': update_urls, - 'file_name': file_name, - 'version': version, + "urls": update_urls, + "file_name": file_name, + "version": version, } driver.update(did, rev, changing_fields) - new_did, new_rev, new_file_name, new_version = conn.execute(''' + new_did, new_rev, new_file_name, new_version = conn.execute( + """ SELECT did, rev, file_name, version FROM index_record - ''').fetchone() + """ + ).fetchone() - new_urls = sorted(url[0] for url in conn.execute(''' + new_urls = sorted( + url[0] + for url in conn.execute( + """ SELECT url FROM index_record_url - ''')) - - new_hashes = {h:v for h,v in conn.execute(''' - SELECT hash_type, hash_value FROM index_record_hash - ''')} - - assert did == new_did, 'record id does not match' - assert rev != new_rev, 'record revision matches prior' - assert update_urls == new_urls, 'record urls mismatch' - assert file_name == new_file_name, 'file_name does not match' - assert version == new_version, 'version does not match' - -@util.removes('index.sq3') + """ + ) + ) + + # new_hashes = { + # h: v + # for h, v in conn.execute( + # """ + # SELECT hash_type, hash_value FROM index_record_hash + # """ + # ) + # } + + assert did == new_did, "record id does not match" + assert rev != new_rev, "record revision matches prior" + assert update_urls == new_urls, "record urls mismatch" + assert file_name == new_file_name, "file_name does not match" + assert version == new_version, "version does not match" + + +@util.removes("index.sq3") def test_driver_update_fails_with_no_records(): - ''' + """ Tests updating a record fails if there are no records. - ''' - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + """ + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") with pytest.raises(NoRecordFound): - driver.update('some_record_that_does_not_exist', 'some_base_version', 'some_revision') + driver.update( + "some_record_that_does_not_exist", "some_base_version", "some_revision" + ) -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_driver_update_fails_with_invalid_id(): - ''' + """ Tests updating a record fails if the record id is not found. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") did = str(uuid.uuid4()) baseid = str(uuid.uuid4()) rev = str(uuid.uuid4())[:8] - form = 'object' + form = "object" - conn.execute(''' + conn.execute( + """ INSERT INTO index_record(did, baseid, rev, form, size) VALUES (?,?,?,?,?) - ''', (did, baseid, rev, form, None)) + """, + (did, baseid, rev, form, None), + ) conn.commit() with pytest.raises(NoRecordFound): - driver.update('some_record_that_does_not_exist','some_record_version', rev) + driver.update("some_record_that_does_not_exist", "some_record_version", rev) + -@util.removes('index.sq3') +@util.removes("index.sq3") def test_driver_update_fails_with_invalid_rev(): - ''' + """ Tests updating a record fails if the record rev is not invalid. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") did = str(uuid.uuid4()) baseid = str(uuid.uuid4()) rev = str(uuid.uuid4())[:8] - form = 'object' + form = "object" - conn.execute(''' + conn.execute( + """ INSERT INTO index_record(did, baseid, rev, form, size) VALUES (?,?,?,?,?) - ''', (did, baseid, rev, form, None)) + """, + (did, baseid, rev, form, None), + ) conn.commit() with pytest.raises(RevisionMismatch): - driver.update(did, baseid, 'some_revision') + driver.update(did, baseid, "some_revision") def test_driver_delete_record(skip_authz): _test_driver_delete_record() -@util.removes('index.sq3') +@util.removes("index.sq3") def _test_driver_delete_record(): - ''' + """ Tests deletion of a record. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") did = str(uuid.uuid4()) baseid = str(uuid.uuid4()) rev = str(uuid.uuid4())[:8] - form = 'object' + form = "object" - conn.execute(''' + conn.execute( + """ INSERT INTO index_record(did, baseid, rev, form, size) VALUES (?,?,?,?,?) - ''', (did, baseid, rev, form, None)) + """, + (did, baseid, rev, form, None), + ) conn.commit() driver.delete(did, rev) - count = conn.execute(''' + count = conn.execute( + """ SELECT COUNT(*) FROM index_record - ''').fetchone()[0] + """ + ).fetchone()[0] - assert count == 0, 'records remain after deletion' + assert count == 0, "records remain after deletion" -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_driver_delete_fails_with_no_records(): - ''' + """ Tests deletion of a record fails if there are no records. - ''' - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + """ + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") with pytest.raises(NoRecordFound): - driver.delete('some_record_that_does_not_exist', 'some_revision') + driver.delete("some_record_that_does_not_exist", "some_revision") + -@util.removes('index.sq3') +@util.removes("index.sq3") def test_driver_delete_fails_with_invalid_id(): - ''' + """ Tests deletion of a record fails if the record id is not found. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") did = str(uuid.uuid4()) baseid = str(uuid.uuid4()) rev = str(uuid.uuid4())[:8] - form = 'object' + form = "object" - conn.execute(''' + conn.execute( + """ INSERT INTO index_record(did, baseid, rev, form, size) VALUES (?,?,?,?,?) - ''', (did, baseid, rev, form, None)) + """, + (did, baseid, rev, form, None), + ) conn.commit() with pytest.raises(NoRecordFound): - driver.delete('some_record_that_does_not_exist', rev) + driver.delete("some_record_that_does_not_exist", rev) + -@util.removes('index.sq3') +@util.removes("index.sq3") def test_driver_delete_fails_with_invalid_rev(): - ''' + """ Tests deletion of a record fails if the record rev is not invalid. - ''' - with sqlite3.connect('index.sq3') as conn: + """ + with sqlite3.connect("index.sq3") as conn: - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") did = str(uuid.uuid4()) baseid = str(uuid.uuid4()) rev = str(uuid.uuid4())[:8] - form = 'object' + form = "object" - conn.execute(''' + conn.execute( + """ INSERT INTO index_record(did, baseid, rev, form, size) VALUES (?,?,?,?,?) - ''', (did, baseid, rev, form, None)) + """, + (did, baseid, rev, form, None), + ) conn.commit() with pytest.raises(RevisionMismatch): - driver.delete(did, 'some_revision') + driver.delete(did, "some_revision") diff --git a/tests/test_import.py b/tests/test_import.py index f723cc233..0b275ff8d 100644 --- a/tests/test_import.py +++ b/tests/test_import.py @@ -1,5 +1,5 @@ def test_import_index(): - ''' + """ Try to import the indexd package. - ''' + """ import indexd diff --git a/tests/test_openapi.py b/tests/test_openapi.py index 74badb1d2..9bdd2e72b 100644 --- a/tests/test_openapi.py +++ b/tests/test_openapi.py @@ -5,14 +5,15 @@ import yaml from swagger_spec_validator.common import SwaggerValidationError + def test_valid_openapi(): - filename = 'openapis/swagger.yaml' - with codecs.open(filename, encoding='utf-8') as f: - url = 'file://' + filename + '#' - spec = yaml.safe_load(f) - if not isinstance(spec, dict): - raise SwaggerValidationError('root node is not a mapping') - # ensure the spec is valid JSON - spec = json.loads(json.dumps(spec)) - validator = swagger_spec_validator.util.get_validator(spec, url) - validator.validate_spec(spec, url) + filename = "openapis/swagger.yaml" + with codecs.open(filename, encoding="utf-8") as f: + url = "file://" + filename + "#" + spec = yaml.safe_load(f) + if not isinstance(spec, dict): + raise SwaggerValidationError("root node is not a mapping") + # ensure the spec is valid JSON + spec = json.loads(json.dumps(spec)) + validator = swagger_spec_validator.util.get_validator(spec, url) + validator.validate_spec(spec, url) diff --git a/tests/test_schema_migration.py b/tests/test_schema_migration.py index b076a27f0..cea26460e 100644 --- a/tests/test_schema_migration.py +++ b/tests/test_schema_migration.py @@ -5,76 +5,87 @@ import sqlite3 import tests.util as util from indexd.index.drivers.alchemy import ( - SQLAlchemyIndexDriver, IndexSchemaVersion, migrate_7) + SQLAlchemyIndexDriver, + IndexSchemaVersion, + migrate_7, +) -from indexd.alias.drivers.alchemy import ( - SQLAlchemyAliasDriver, AliasSchemaVersion) +from indexd.alias.drivers.alchemy import SQLAlchemyAliasDriver, AliasSchemaVersion from indexd.index.drivers.alchemy import migrate_1, migrate_2 -from indexd.index.drivers.alchemy import CURRENT_SCHEMA_VERSION, SCHEMA_MIGRATION_FUNCTIONS +from indexd.index.drivers.alchemy import ( + CURRENT_SCHEMA_VERSION, + SCHEMA_MIGRATION_FUNCTIONS, +) from tests.alchemy import SQLAlchemyIndexTestDriver from sqlalchemy_utils import database_exists, drop_database Base = declarative_base() INDEX_TABLES = { - 'base_version': [ - (u'baseid', u'character varying'), - ], - 'index_record': [ - (u'did', u'character varying'), - (u'rev', u'character varying'), - (u'form', u'character varying'), - (u'size', u'bigint'), - (u'baseid', u'character varying'), - (u'created_date', u'timestamp without time zone'), - (u'updated_date', u'timestamp without time zone'), + "base_version": [("baseid", "character varying")], + "index_record": [ + ("did", "character varying"), + ("rev", "character varying"), + ("form", "character varying"), + ("size", "bigint"), + ("baseid", "character varying"), + ("created_date", "timestamp without time zone"), + ("updated_date", "timestamp without time zone"), ], - 'index_record_hash': [ - (u'did', u'character varying'), - (u'hash_type', u'character varying'), - (u'hash_value', u'character varying'), - ], - 'index_record_url': [ - (u'did', u'character varying'), - (u'url', u'character varying'), + "index_record_hash": [ + ("did", "character varying"), + ("hash_type", "character varying"), + ("hash_value", "character varying"), ], + "index_record_url": [("did", "character varying"), ("url", "character varying")], } def update_version_table_for_testing(db, tb_name, val): with sqlite3.connect(db) as conn: - conn.execute('''\ + conn.execute( + """\ CREATE TABLE IF NOT EXISTS {} (version INT)\ - '''.format(tb_name)) - conn.execute(''' + """.format( + tb_name + ) + ) + conn.execute( + """ DELETE FROM {} - '''.format(tb_name)) - conn.execute(''' + """.format( + tb_name + ) + ) + conn.execute( + """ INSERT INTO {} (version) VALUES ({}) - '''.format(tb_name, val)) + """.format( + tb_name, val + ) + ) conn.commit() def test_migrate_7(swg_index_client, indexd_server): data = { - 'form': 'object', - 'size': 123, - 'urls': ['s3://endpointurl/bucket/key'], - 'metadata': { - 'acls': 'a,b' - }, - 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d682e5'} + "form": "object", + "size": 123, + "urls": ["s3://endpointurl/bucket/key"], + "metadata": {"acls": "a,b"}, + "hashes": {"md5": "8b9942cf415384b27cadf1f4d2d682e5"}, } r = swg_index_client.add_entry(data) - with settings['config']['INDEX']['driver'].session as session: + with settings["config"]["INDEX"]["driver"].session as session: migrate_7(session) r = swg_index_client.get_entry(r.did) - assert r.acl == ['a', 'b'] + assert r.acl == ["a", "b"] assert r.metadata == {} -@util.removes('index.sq3') + +@util.removes("index.sq3") def test_migrate_index(): def test_migrate_index_internal(monkeypatch): called = [] @@ -82,19 +93,16 @@ def test_migrate_index_internal(monkeypatch): def mock_migrate(**kwargs): called.append(True) - monkeypatch.setattr( - 'indexd.index.drivers.alchemy.CURRENT_SCHEMA_VERSION', 2) - monkeypatch.setattr( - 'indexd.utils.check_engine_for_migrate', - lambda _: True - ) + monkeypatch.setattr("indexd.index.drivers.alchemy.CURRENT_SCHEMA_VERSION", 2) + monkeypatch.setattr("indexd.utils.check_engine_for_migrate", lambda _: True) monkeypatch.setattr( - 'indexd.index.drivers.alchemy.SCHEMA_MIGRATION_FUNCTIONS', - [mock_migrate, mock_migrate]) + "indexd.index.drivers.alchemy.SCHEMA_MIGRATION_FUNCTIONS", + [mock_migrate, mock_migrate], + ) - update_version_table_for_testing('index.sq3', 'index_schema_version', 0) - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + update_version_table_for_testing("index.sq3", "index_schema_version", 0) + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") assert len(called) == 2 with driver.session as s: @@ -105,7 +113,7 @@ def mock_migrate(**kwargs): return test_migrate_index_internal -@util.removes('index.sq3') +@util.removes("index.sq3") def test_migrate_index_only_diff(): def test_migrate_index_only_diff_internal(monkeypatch): called = [] @@ -114,32 +122,29 @@ def mock_migrate(**kwargs): called.append(True) called_2 = [] + def mock_migrate_2(**kwargs): called_2.append(True) + monkeypatch.setattr("indexd.utils.check_engine_for_migrate", lambda _: True) + monkeypatch.setattr("indexd.index.drivers.alchemy.CURRENT_SCHEMA_VERSION", 1) monkeypatch.setattr( - 'indexd.utils.check_engine_for_migrate', - lambda _: True + "indexd.index.drivers.alchemy.SCHEMA_MIGRATION_FUNCTIONS", + [mock_migrate, mock_migrate_2], ) - monkeypatch.setattr( - 'indexd.index.drivers.alchemy.CURRENT_SCHEMA_VERSION', 1) - monkeypatch.setattr( - 'indexd.index.drivers.alchemy.SCHEMA_MIGRATION_FUNCTIONS', - [mock_migrate, mock_migrate_2]) - update_version_table_for_testing('index.sq3', 'index_schema_version', 0) + update_version_table_for_testing("index.sq3", "index_schema_version", 0) - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") assert len(called) == 1 assert len(called_2) == 0 called = [] called_2 = [] - monkeypatch.setattr( - 'indexd.index.drivers.alchemy.CURRENT_SCHEMA_VERSION', 2) + monkeypatch.setattr("indexd.index.drivers.alchemy.CURRENT_SCHEMA_VERSION", 2) - update_version_table_for_testing('index.sq3', 'index_schema_version', 1) - driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + update_version_table_for_testing("index.sq3", "index_schema_version", 1) + driver = SQLAlchemyIndexDriver("sqlite:///index.sq3") assert len(called) == 0 assert len(called_2) == 1 @@ -150,7 +155,7 @@ def mock_migrate_2(**kwargs): return test_migrate_index_only_diff_internal -@util.removes('alias.sq3') +@util.removes("alias.sq3") def test_migrate_alias(): def test_migrate_alias_internal(monkeypatch): called = [] @@ -158,20 +163,16 @@ def test_migrate_alias_internal(monkeypatch): def mock_migrate(**kwargs): called.append(True) + monkeypatch.setattr("indexd.alias.drivers.alchemy.CURRENT_SCHEMA_VERSION", 1) monkeypatch.setattr( - 'indexd.alias.drivers.alchemy.CURRENT_SCHEMA_VERSION', 1) - monkeypatch.setattr( - 'indexd.alias.drivers.alchemy.SCHEMA_MIGRATION_FUNCTIONS', - [mock_migrate]) - - monkeypatch.setattr( - 'indexd.utils.check_engine_for_migrate', - lambda _: True + "indexd.alias.drivers.alchemy.SCHEMA_MIGRATION_FUNCTIONS", [mock_migrate] ) - update_version_table_for_testing('alias.sq3', 'alias_schema_version', 0) + monkeypatch.setattr("indexd.utils.check_engine_for_migrate", lambda _: True) + + update_version_table_for_testing("alias.sq3", "alias_schema_version", 0) - driver = SQLAlchemyAliasDriver('sqlite:///alias.sq3') + driver = SQLAlchemyAliasDriver("sqlite:///alias.sq3") assert len(called) == 1 with driver.session as s: v = s.query(AliasSchemaVersion).first() @@ -181,62 +182,70 @@ def mock_migrate(**kwargs): def test_migrate_index_versioning(monkeypatch): - engine = create_engine(settings['config']['TEST_DB']) + engine = create_engine(settings["config"]["TEST_DB"]) if database_exists(engine.url): drop_database(engine.url) - driver = SQLAlchemyIndexTestDriver(settings['config']['TEST_DB']) - monkeypatch.setattr( - 'indexd.index.drivers.alchemy.CURRENT_SCHEMA_VERSION', 2) + driver = SQLAlchemyIndexTestDriver(settings["config"]["TEST_DB"]) + monkeypatch.setattr("indexd.index.drivers.alchemy.CURRENT_SCHEMA_VERSION", 2) monkeypatch.setattr( - 'indexd.index.drivers.alchemy.SCHEMA_MIGRATION_FUNCTIONS', - [migrate_1, migrate_2]) - - monkeypatch.setattr( - 'indexd.utils.check_engine_for_migrate', - lambda _: True + "indexd.index.drivers.alchemy.SCHEMA_MIGRATION_FUNCTIONS", + [migrate_1, migrate_2], ) + monkeypatch.setattr("indexd.utils.check_engine_for_migrate", lambda _: True) + conn = driver.engine.connect() for _ in range(10): did = str(uuid.uuid4()) rev = str(uuid.uuid4())[:8] size = 512 - form = 'object' - conn.execute("\ + form = "object" + conn.execute( + "\ INSERT INTO index_record(did, rev, form, size) \ - VALUES ('{}','{}','{}',{})".format(did, rev, form, size)) + VALUES ('{}','{}','{}',{})".format( + did, rev, form, size + ) + ) conn.execute("commit") conn.close() - driver = SQLAlchemyIndexDriver(settings['config']['TEST_DB']) + driver = SQLAlchemyIndexDriver(settings["config"]["TEST_DB"]) with driver.session as s: v = s.query(IndexSchemaVersion).first() assert v.version == 2 s.delete(v) Base.metadata.reflect(bind=driver.engine) - tables = Base.metadata.tables.keys() + tables = list(Base.metadata.tables.keys()) for table in INDEX_TABLES: - assert table in tables, '{table} not created'.format(table=table) + assert table in tables, "{table} not created".format(table=table) conn = driver.engine.connect() for table, schema in INDEX_TABLES.items(): - cols = conn.execute("\ + cols = conn.execute( + "\ SELECT column_name, data_type \ FROM information_schema.columns \ - WHERE table_schema = 'public' AND table_name = '{table}'". - format(table=table)) + WHERE table_schema = 'public' AND table_name = '{table}'".format( + table=table + ) + ) assert schema == [i for i in cols] vids = conn.execute("SELECT baseid FROM index_record").fetchall() for baseid in vids: - c = conn.execute("\ + c = conn.execute( + "\ SELECT COUNT(*) AS number_rows \ FROM base_version \ - WHERE baseid = '{}';".format(baseid[0])).fetchone()[0] + WHERE baseid = '{}';".format( + baseid[0] + ) + ).fetchone()[0] assert c == 1 conn.close() @@ -244,4 +253,3 @@ def test_migrate_index_versioning(monkeypatch): def test_schema_version(): assert CURRENT_SCHEMA_VERSION == len(SCHEMA_MIGRATION_FUNCTIONS) - diff --git a/tests/test_settings.py b/tests/test_settings.py index 851858f2c..732ef3e2f 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -1,2 +1,3 @@ from indexd.default_settings import settings -settings['config']['TEST_DB'] = 'postgres://postgres@localhost/test_migration_db' + +settings["config"]["TEST_DB"] = "postgres://postgres@localhost/test_migration_db" diff --git a/tests/test_setup.py b/tests/test_setup.py index 24df01feb..282154a36 100644 --- a/tests/test_setup.py +++ b/tests/test_setup.py @@ -8,117 +8,120 @@ OLD_SQLITE = sqlite3.sqlite_version_info < (3, 7, 16) -INDEX_HOST = 'index.sq3' -ALIAS_HOST = 'alias.sq3' +INDEX_HOST = "index.sq3" +ALIAS_HOST = "alias.sq3" INDEX_TABLES = { - 'base_version': [ - (0, u'baseid', u'VARCHAR', 1, None, 1), + "base_version": [(0, "baseid", "VARCHAR", 1, None, 1)], + "index_record": [ + (0, "did", "VARCHAR", 1, None, 1), + (1, "baseid", "VARCHAR", 0, None, 0), + (2, "rev", "VARCHAR", 0, None, 0), + (3, "form", "VARCHAR", 0, None, 0), + (4, "size", "BIGINT", 0, None, 0), + (5, "created_date", "DATETIME", 0, None, 0), + (6, "updated_date", "DATETIME", 0, None, 0), + (7, "file_name", "VARCHAR", 0, None, 0), + (8, "version", "VARCHAR", 0, None, 0), + (9, "uploader", "VARCHAR", 0, None, 0), ], - 'index_record': [ - (0, u'did', u'VARCHAR', 1, None, 1), - (1, u'baseid', u'VARCHAR', 0, None, 0), - (2, u'rev', u'VARCHAR', 0, None, 0), - (3, u'form', u'VARCHAR', 0, None, 0), - (4, u'size', u'BIGINT', 0, None, 0), - (5, u'created_date', u'DATETIME', 0, None, 0), - (6, u'updated_date', u'DATETIME', 0, None, 0), - (7, u'file_name', u'VARCHAR', 0, None, 0), - (8, u'version', u'VARCHAR', 0, None, 0), - (9, u'uploader', u'VARCHAR', 0, None, 0), + "index_record_hash": [ + (0, "did", "VARCHAR", 1, None, 1), + (1, "hash_type", "VARCHAR", 1, None, 1 if OLD_SQLITE else 2), + (2, "hash_value", "VARCHAR", 0, None, 0), ], - 'index_record_hash': [ - (0, u'did', u'VARCHAR', 1, None, 1), - (1, u'hash_type', u'VARCHAR', 1, None, 1 if OLD_SQLITE else 2), - (2, u'hash_value', u'VARCHAR', 0, None, 0), - ], - 'index_record_url': [ - (0, u'did', u'VARCHAR', 1, None, 1), - (1, u'url', u'VARCHAR', 1, None, 1 if OLD_SQLITE else 2), - ], - 'index_schema_version': [ - (0, u'version', u'INTEGER', 1, None, 1), + "index_record_url": [ + (0, "did", "VARCHAR", 1, None, 1), + (1, "url", "VARCHAR", 1, None, 1 if OLD_SQLITE else 2), ], + "index_schema_version": [(0, "version", "INTEGER", 1, None, 1)], } ALIAS_TABLES = { - 'alias_record': [ - (0, u'name', u'VARCHAR', 1, None, 1), - (1, u'rev', u'VARCHAR', 0, None, 0), - (2, u'size', u'BIGINT', 0, None, 0), - (3, u'release', u'VARCHAR', 0, None, 0), - (4, u'metastring', u'VARCHAR', 0, None, 0), - (5, u'keeper_authority', u'VARCHAR', 0, None, 0), - ], - 'alias_record_hash': [ - (0, u'name', u'VARCHAR', 1, None, 1), - (1, u'hash_type', u'VARCHAR', 1, None, 1 if OLD_SQLITE else 2), - (2, u'hash_value', u'VARCHAR', 0, None, 0) + "alias_record": [ + (0, "name", "VARCHAR", 1, None, 1), + (1, "rev", "VARCHAR", 0, None, 0), + (2, "size", "BIGINT", 0, None, 0), + (3, "release", "VARCHAR", 0, None, 0), + (4, "metastring", "VARCHAR", 0, None, 0), + (5, "keeper_authority", "VARCHAR", 0, None, 0), ], - 'alias_record_host_authority': [ - (0, u'name', u'VARCHAR', 1, None, 1), - (1, u'host', u'VARCHAR', 1, None, 1 if OLD_SQLITE else 2), + "alias_record_hash": [ + (0, "name", "VARCHAR", 1, None, 1), + (1, "hash_type", "VARCHAR", 1, None, 1 if OLD_SQLITE else 2), + (2, "hash_value", "VARCHAR", 0, None, 0), ], - 'alias_schema_version': [ - (0, u'version', u'INTEGER', 1, None, 1), + "alias_record_host_authority": [ + (0, "name", "VARCHAR", 1, None, 1), + (1, "host", "VARCHAR", 1, None, 1 if OLD_SQLITE else 2), ], + "alias_schema_version": [(0, "version", "INTEGER", 1, None, 1)], } -INDEX_CONFIG = { - 'driver': SQLAlchemyIndexDriver('sqlite:///index.sq3'), -} +INDEX_CONFIG = {"driver": SQLAlchemyIndexDriver("sqlite:///index.sq3")} -ALIAS_CONFIG = { - 'driver': SQLAlchemyAliasDriver('sqlite:///alias.sq3'), -} +ALIAS_CONFIG = {"driver": SQLAlchemyAliasDriver("sqlite:///alias.sq3")} @util.removes(INDEX_HOST) def test_sqlite3_index_setup_tables(): - ''' + """ Tests that the SQLite3 index database gets set up correctly. - ''' - SQLAlchemyIndexDriver('sqlite:///index.sq3') + """ + SQLAlchemyIndexDriver("sqlite:///index.sq3") with sqlite3.connect(INDEX_HOST) as conn: - c = conn.execute(''' + c = conn.execute( + """ SELECT name FROM sqlite_master WHERE type = 'table' - ''') + """ + ) tables = [i[0] for i in c] for table in INDEX_TABLES: - assert table in tables, '{table} not created'.format(table=table) + assert table in tables, "{table} not created".format(table=table) - for table, schema in INDEX_TABLES.items(): + for table, schema in list(INDEX_TABLES.items()): # NOTE PRAGMA's don't work with parameters... - c = conn.execute(''' + c = conn.execute( + """ PRAGMA table_info ('{table}') - '''.format(table=table)) + """.format( + table=table + ) + ) assert schema == [i for i in c] + @util.removes(ALIAS_HOST) def test_sqlite3_alias_setup_tables(): - ''' + """ Tests that the SQLite3 alias database gets set up correctly. - ''' - SQLAlchemyAliasDriver('sqlite:///alias.sq3') + """ + SQLAlchemyAliasDriver("sqlite:///alias.sq3") with sqlite3.connect(ALIAS_HOST) as conn: - c = conn.execute(''' + c = conn.execute( + """ SELECT name FROM sqlite_master WHERE type = 'table' - ''') + """ + ) tables = [i[0] for i in c] for table in ALIAS_TABLES: - assert table in tables, '{table} not created'.format(table=table) + assert table in tables, "{table} not created".format(table=table) - for table, schema in ALIAS_TABLES.items(): + for table, schema in list(ALIAS_TABLES.items()): # NOTE PRAGMA's don't work with parameters... - c = conn.execute(''' + c = conn.execute( + """ PRAGMA table_info ('{table}') - '''.format(table=table)) + """.format( + table=table + ) + ) assert schema == [i for i in c] diff --git a/tests/test_urls_endpoints.py b/tests/test_urls_endpoints.py index 04dc8dd76..b6f06d081 100644 --- a/tests/test_urls_endpoints.py +++ b/tests/test_urls_endpoints.py @@ -73,18 +73,25 @@ def test_query_urls_metadata(swg_index_client, swg_query_client, test_data): """ url_x_count, _, unversioned_count = test_data # test get all - urls_list = swg_query_client.query_urls_metadata(key="state", value="uploaded", url="awesome-x") + urls_list = swg_query_client.query_urls_metadata( + key="state", value="uploaded", url="awesome-x" + ) assert len(urls_list) == 2 * url_x_count # test list versioned urls - urls_list = swg_query_client.query_urls_metadata(key="state", value="uploaded", - url="awesome-x", versioned=True) + urls_list = swg_query_client.query_urls_metadata( + key="state", value="uploaded", url="awesome-x", versioned=True + ) assert len(urls_list) == url_x_count # test list un versioned - urls_list = swg_query_client.query_urls_metadata(key="state", value="uploaded", url="endpointurl", versioned=False) + urls_list = swg_query_client.query_urls_metadata( + key="state", value="uploaded", url="endpointurl", versioned=False + ) assert len(urls_list) == unversioned_count # test unknown state - urls_list = swg_query_client.query_urls_metadata(key="state", value="uploadedx", url="awesome-x") + urls_list = swg_query_client.query_urls_metadata( + key="state", value="uploadedx", url="awesome-x" + ) assert len(urls_list) == 0 diff --git a/tests/util.py b/tests/util.py index 9b659bf96..81310f442 100644 --- a/tests/util.py +++ b/tests/util.py @@ -1,19 +1,23 @@ import os import shutil + class removes(object): - ''' + """ Decorator to remove a path after function call. - ''' + """ + def __init__(self, path): self.path = path def __call__(self, f): - ''' + """ Ensures path is removed after function call. - ''' + """ + def wrapper(*args, **kwargs): - try: return f(*args, **kwargs) + try: + return f(*args, **kwargs) finally: if not os.path.exists(self.path): pass @@ -26,10 +30,10 @@ def wrapper(*args, **kwargs): def assert_blank(r): - ''' + """ Check that the fields that should be empty in a blank record are empty. - ''' + """ assert r.records[0].baseid assert r.records[0].did assert not r.records[0].size