From bad8667ae2cc93b46dfea877e03299a72151eb3f Mon Sep 17 00:00:00 2001 From: Lars Holm Nielsen Date: Thu, 1 Jun 2017 14:45:35 +0200 Subject: [PATCH] openaire: use object type model for openaire * Stores the OpenAIRE type and resourceType in the object types JSON. --- .../records/test_schemas_openaire_json.py | 2 +- zenodo/modules/openaire/helpers.py | 40 +++++----- zenodo/modules/openaire/schema.py | 51 +++++++------ zenodo/modules/records/data/objecttypes.json | 76 +++++++++++++++++++ .../records/objecttype-v1.0.0.json | 15 ++++ 5 files changed, 144 insertions(+), 40 deletions(-) diff --git a/tests/unit/records/test_schemas_openaire_json.py b/tests/unit/records/test_schemas_openaire_json.py index f70a472e0..71edc3ca9 100644 --- a/tests/unit/records/test_schemas_openaire_json.py +++ b/tests/unit/records/test_schemas_openaire_json.py @@ -80,7 +80,7 @@ def test_resource_types(app, db, minimal_oai_record, recid_pid): assert obj['originalId'] == 'oai:zenodo.org:123' assert obj['collectedFromId'] == 'opendoar____::2659' assert obj['hostedById'] == 'opendoar____::2659' - assert obj['resourceType'] == '0001' + assert obj['resourceType'] == '0004' assert obj['type'] == 'publication' diff --git a/zenodo/modules/openaire/helpers.py b/zenodo/modules/openaire/helpers.py index 253e9bc24..f9f7373be 100644 --- a/zenodo/modules/openaire/helpers.py +++ b/zenodo/modules/openaire/helpers.py @@ -30,6 +30,8 @@ from flask import current_app +from zenodo.modules.records.models import ObjectType + class _OAType(object): """OpenAIRE types.""" @@ -40,8 +42,9 @@ class _OAType(object): def is_openaire_publication(record): """Determine if record is a publication for OpenAIRE.""" - types = ['publication', 'presentation', 'poster'] - if record.get('resource_type', {}).get('type') not in types: + oatype = ObjectType.get_by_dict(record.get('resource_type')).get( + 'openaire', {}) + if not oatype or oatype['type'] != _OAType.publication: return False # Has grants, is part of ecfunded community or is open access. @@ -53,9 +56,9 @@ def is_openaire_publication(record): def is_openaire_dataset(record): """Determine if record is a dataset for OpenAIRE.""" - if record.get('resource_type', {}).get('type') == 'dataset': - return True - return False + oatype = ObjectType.get_by_dict(record.get('resource_type')).get( + 'openaire', {}) + return oatype and oatype['type'] == _OAType.dataset def openaire_type(record): @@ -74,26 +77,29 @@ def openaire_id(record): def _openaire_id(record, oatype): """Compute the OpenAIRE identifier.""" - prefix = None - value = None - if oatype == _OAType.publication: - # Hard-coded prefix from OpenAIRE. - prefix = current_app.config['OPENAIRE_ID_PREFIX_PUBLICATION'] - value = record.get('_oai', {}).get('id') - elif oatype == _OAType.dataset: - # Hard-coded prefix from OpenAIRE. - prefix = current_app.config['OPENAIRE_ID_PREFIX_DATASET'] - value = record.get('doi') + prefix, identifier = openaire_original_id(record, oatype) - if not value or not prefix: + if not identifier or not prefix: return None m = hashlib.md5() - m.update(value.encode('utf8')) + m.update(identifier.encode('utf8')) return '{}::{}'.format(prefix, m.hexdigest()) +def openaire_original_id(record, oatype): + """Original original identifier.""" + prefix = current_app.config['OPENAIRE_NAMESPACE_PREFIXES'].get(oatype) + + value = None + if oatype == _OAType.publication: + value = record.get('_oai', {}).get('id') + elif oatype == _OAType.dataset: + value = record.get('doi') + + return prefix, value + def openaire_link(record): """Compute an OpenAIRE link.""" oatype = openaire_type(record) diff --git a/zenodo/modules/openaire/schema.py b/zenodo/modules/openaire/schema.py index bf65b7faf..178f9fffd 100644 --- a/zenodo/modules/openaire/schema.py +++ b/zenodo/modules/openaire/schema.py @@ -34,6 +34,8 @@ from zenodo.modules.records.models import ObjectType from zenodo.modules.records.serializers.fields import DateString +from .helpers import openaire_original_id, openaire_type + OpenAIREType = namedtuple('OpenAIREType', ('type', 'resource_type')) @@ -60,42 +62,48 @@ class RecordSchemaOpenAIREJSON(Schema): embargoEndDate = DateString(attribute='metadata.embargo_date') publisher = fields.Method('get_publisher') - collectedFromId = fields.Method('get_openaire_id', required=True) - hostedById = fields.Method('get_openaire_id') + collectedFromId = fields.Method('get_datasource_id', required=True) + hostedById = fields.Method('get_datasource_id') linksToProjects = fields.Method('get_links_to_projects') pids = fields.Method('get_pids') - def _resolve_openaire_type(self, obj): - # TODO: Move to utils.py? - metadata = obj.get('metadata') - obj_type = ObjectType.get_by_dict(metadata.get('resource_type')) - if obj_type['internal_id'] == 'dataset': - return OpenAIREType('dataset', '0021') - else: - return OpenAIREType('publication', '0001') + def _openaire_type(self, obj): + return ObjectType.get_by_dict( + obj.get('metadata', {}).get('resource_type') + ).get('openaire') def get_original_id(self, obj): """Get Original Id.""" - openaire_type = self._resolve_openaire_type(obj) - if openaire_type.type == 'publication': - return obj.get('metadata', {}).get('_oai', {}).get('id') - if openaire_type.type == 'dataset': - return obj.get('metadata', {}).get('doi') + oatype = self._openaire_type(obj) + if oatype: + return openaire_original_id( + obj.get('metadata', {}), + oatype['type'] + )[1] + return missing def get_type(self, obj): """Get record type.""" - return self._resolve_openaire_type(obj).type + oatype = self._openaire_type(obj) + if oatype: + return oatype['type'] + return missing def get_resource_type(self, obj): """Get resource type.""" - return self._resolve_openaire_type(obj).resource_type + oatype = self._openaire_type(obj) + if oatype: + return oatype['resourceType'] + return missing - def get_openaire_id(self, obj): + def get_datasource_id(self, obj): """Get OpenAIRE Zenodo ID.""" - # TODO: Move to utils.py? - openaire_type = self._resolve_openaire_type(obj).type - return current_app.config['OPENAIRE_ZENODO_IDS'].get(openaire_type) + oatype = self._openaire_type(obj) + if oatype: + return current_app.config['OPENAIRE_ZENODO_IDS'].get( + oatype['type']) + return missing # Mapped from: http://api.openaire.eu/vocabularies/dnet:access_modes LICENSE_MAPPING = { @@ -134,7 +142,6 @@ def get_pids(self, obj): def get_url(self, obj): """Get record URL.""" - # TODO: Zenodo or DOI URL? ("zenodo.org/..." or "doi.org/...") return current_app.config['ZENODO_RECORDS_UI_LINKS_FORMAT'].format( recid=obj['metadata']['recid']) diff --git a/zenodo/modules/records/data/objecttypes.json b/zenodo/modules/records/data/objecttypes.json index 851276d80..f79c62bfb 100644 --- a/zenodo/modules/records/data/objecttypes.json +++ b/zenodo/modules/records/data/objecttypes.json @@ -25,6 +25,10 @@ {"$ref": "http://zenodo.org/objecttypes/publication/workingpaper"}, {"$ref": "http://zenodo.org/objecttypes/publication/other"} ], + "openaire": { + "resourceType": "0001", + "type": "publication" + }, "csl": "article" }, { @@ -40,6 +44,10 @@ "datacite": {"general": "Text", "type": "Poster"}, "eurepo": "info:eu-repo/semantics/conferencePoster", "schema.org": "http://schema.org/CreativeWork", + "openaire": { + "resourceType": "0004", + "type": "publication" + }, "csl": "graphic" }, { @@ -55,6 +63,10 @@ "datacite": {"general": "Text", "type": "Presentation"}, "eurepo": "info:eu-repo/semantics/lecture", "schema.org": "http://schema.org/CreativeWork", + "openaire": { + "resourceType": "0010", + "type": "publication" + }, "csl": "speech" }, { @@ -70,6 +82,10 @@ "datacite": {"general": "Dataset"}, "eurepo": "info:eu-repo/semantics/other", "schema.org": "http://schema.org/Dataset", + "openaire": { + "resourceType": "0021", + "type": "dataset" + }, "csl": "dataset" }, { @@ -154,6 +170,10 @@ "datacite": {"general": "Text", "type": "Book"}, "eurepo": "info:eu-repo/semantics/book", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0002", + "type": "publication" + }, "csl": "book" }, { @@ -170,6 +190,10 @@ "datacite": {"general": "Text", "type": "Book section"}, "eurepo": "info:eu-repo/semantics/bookPart", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0013", + "type": "publication" + }, "csl": "chapter" }, { @@ -186,6 +210,10 @@ "datacite": {"general": "Text", "type": "Conference paper"}, "eurepo": "info:eu-repo/semantics/conferencePaper", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0004", + "type": "publication" + }, "csl": "paper-conference" }, { @@ -202,6 +230,10 @@ "datacite": {"general": "Text", "type": "Journal article"}, "eurepo": "info:eu-repo/semantics/article", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0001", + "type": "publication" + }, "csl": "article-journal" }, { @@ -218,6 +250,10 @@ "eurepo": "info:eu-repo/semantics/patent", "schema.org": "http://schema.org/CreativeWork", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0019", + "type": "publication" + }, "csl": "patent" }, { @@ -234,6 +270,10 @@ "datacite": {"general": "Text", "type": "Preprint"}, "eurepo": "info:eu-repo/semantics/preprint", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0016", + "type": "publication" + }, "csl": "article" }, { @@ -250,6 +290,10 @@ "datacite": {"general": "Text", "type": "Report"}, "eurepo": "info:eu-repo/semantics/report", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0017", + "type": "publication" + }, "csl": "article" }, { @@ -266,6 +310,10 @@ "datacite": {"general": "Text", "type": "Software documentation"}, "eurepo": "info:eu-repo/semantics/technicalDocumentation", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0032", + "type": "publication" + }, "csl": "article" }, { @@ -282,6 +330,10 @@ "datacite": {"general": "Text", "type": "Thesis"}, "eurepo": "info:eu-repo/semantics/doctoralThesis", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0007", + "type": "publication" + }, "csl": "thesis" }, { @@ -298,6 +350,10 @@ "datacite": {"general": "Text", "type": "Technical note"}, "eurepo": "info:eu-repo/semantics/technicalDocumentation", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0020", + "type": "publication" + }, "csl": "article" }, { @@ -314,6 +370,10 @@ "datacite": {"general": "Text", "type": "Working paper"}, "eurepo": "info:eu-repo/semantics/workingPaper", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0016", + "type": "publication" + }, "csl": "article" }, { @@ -330,6 +390,10 @@ "datacite": {"general": "Text", "type": "Proposal"}, "eurepo": "info:eu-repo/semantics/researchProposal", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0020", + "type": "publication" + }, "csl": "article" }, { @@ -346,6 +410,10 @@ "datacite": {"general": "Text", "type": "Project deliverable"}, "eurepo": "info:eu-repo/semantics/report", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0017", + "type": "publication" + }, "csl": "report" }, { @@ -362,6 +430,10 @@ "datacite": {"general": "Text", "type": "Project milestone"}, "eurepo": "info:eu-repo/semantics/report", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0017", + "type": "publication" + }, "csl": "report" }, { @@ -378,6 +450,10 @@ "datacite": {"general": "Text", "type": "Other"}, "eurepo": "info:eu-repo/semantics/other", "parent": {"$ref": "http://zenodo.org/objecttypes/publication"}, + "openaire": { + "resourceType": "0020", + "type": "publication" + }, "csl": "article" }, { diff --git a/zenodo/modules/records/jsonschemas/records/objecttype-v1.0.0.json b/zenodo/modules/records/jsonschemas/records/objecttype-v1.0.0.json index 717d266aa..8915e91d7 100644 --- a/zenodo/modules/records/jsonschemas/records/objecttype-v1.0.0.json +++ b/zenodo/modules/records/jsonschemas/records/objecttype-v1.0.0.json @@ -57,6 +57,21 @@ "eurepo": { "type": "string" }, + "openaire": { + "type": "object", + "properties": { + "resourceType": { + "type": "string" + }, + "type": { + "type": "string" + }, + "required": [ + "resourceType", + "type" + ] + } + }, "parent": { "type": "object" },