Skip to content

Commit

Permalink
Merge 1b26636 into 5403d0d
Browse files Browse the repository at this point in the history
  • Loading branch information
slint committed Oct 30, 2020
2 parents 5403d0d + 1b26636 commit 3546278
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 14 deletions.
4 changes: 3 additions & 1 deletion tests/unit/records/test_schemas_dcat.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@


def test_dcat_serializer(db, es, record_with_bucket):
"""."""
"""Tests the DCAT XSLT-based serializer."""
pid, record = record_with_bucket
serialized_record = dcat_v1.serialize(pid, record)
assert record['title'] in serialized_record
Expand All @@ -39,3 +39,5 @@ def test_dcat_serializer(db, es, record_with_bucket):
for creator in record['creators']:
assert creator['familyname'] in serialized_record
assert creator['givennames'] in serialized_record
for f in record['_files']:
assert f['key'] in serialized_record
78 changes: 65 additions & 13 deletions zenodo/modules/records/serializers/dcat.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,20 @@

from __future__ import absolute_import, print_function

import mimetypes

import idutils
from flask import has_request_context
from flask_security import current_user
from invenio_records.api import Record
from lxml import etree as ET
from pkg_resources import resource_stream
from werkzeug.utils import cached_property

from zenodo.modules.records.serializers.schemas.common import ui_link_for

from ..permissions import has_read_files_permission


class DCATSerializer(object):
"""DCAT serializer for records."""
Expand All @@ -47,27 +57,69 @@ def xslt_transform_func(self):
transform = ET.XSLT(xsl)
return transform

FILES_FIELDS = {
'{{{dcat}}}downloadURL': lambda f, r: ui_link_for(
'record_file', id=r['recid'], filename=f['key']),
'{{{dcat}}}mediaType': lambda f, r: mimetypes.guess_type(f['key'])[0],
'{{{dcat}}}byteSize': lambda f, r: str(f['size']),
'{{{dcat}}}accessURL': lambda f, r: idutils.to_url(
r['doi'], 'doi', url_scheme='https'),
# TODO: there's also "spdx:checksum", but it's not in the W3C spec yet
}

def _add_files(self, root, files, record):
"""Add files information via distribution elements."""
ns = root.nsmap
for f in files:
dist_wrapper = ET.SubElement(
root[0], '{{{dcat}}}distribution'.format(**ns))
dist = ET.SubElement(
dist_wrapper, '{{{dcat}}}Distribution'.format(**ns))

for tag, func in self.FILES_FIELDS.items():
val = func(f, record)
if val:
el = ET.SubElement(dist, tag.format(**ns))
el.text = val

def _etree_tostring(self, root):
return ET.tostring(
root,
pretty_print=True,
xml_declaration=True,
encoding='utf-8',
).decode('utf-8')

def transform_with_xslt(self, pid, record, search_hit=False, **kwargs):
"""Transform record with XSLT."""
files_data = None
if search_hit:
record = self.datacite_serializer.transform_search_hit(
dc_record = self.datacite_serializer.transform_search_hit(
pid, record, **kwargs)
if '_files' in record['_source']:
files_data = record['_source']['_files']
elif '_files' in record:
files_data = record['_files']

else:
record = self.datacite_serializer.transform_record(
dc_record = self.datacite_serializer.transform_record(
pid, record, **kwargs)
dc_etree = self.datacite_serializer.schema.dump_etree(record)
# for single-record serialization check file read permissions
if isinstance(record, Record) and '_files' in record:
if not has_request_context() or has_read_files_permission(
current_user, record):
files_data = record['_files']

dc_etree = self.datacite_serializer.schema.dump_etree(dc_record)
dc_namespace = self.datacite_serializer.schema.ns[None]
dc_etree.tag = '{{{0}}}resource'.format(dc_namespace)
dcat_etree = self.xslt_transform_func(dc_etree)
return dcat_etree
dcat_etree = self.xslt_transform_func(dc_etree).getroot()

def _etree_tostring(self, root):
return ET.tostring(
root,
pretty_print=True,
xml_declaration=True,
encoding='utf-8',
).decode('utf-8')
# Inject files in results (since the XSLT can't do that by default)
if files_data:
self._add_files(dcat_etree, files_data, record)

return dcat_etree

def serialize(self, pid, record, **kwargs):
"""Serialize a single record.
Expand Down Expand Up @@ -96,4 +148,4 @@ def serialize_search(self, pid_fetcher, search_result, **kwargs):

def serialize_oaipmh(self, pid, record):
"""Serialize a single record for OAI-PMH."""
return self.transform_with_xslt(pid, record, search_hit=True).getroot()
return self.transform_with_xslt(pid, record, search_hit=True)

0 comments on commit 3546278

Please sign in to comment.