diff --git a/CHANGES.rst b/CHANGES.rst index 2412a02f92..f8675d7744 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -22,6 +22,9 @@ New features Bugfixes ++++++++ +- Fix XML Page template files in Python 3 + (`#319 `_) + - Fix ZMI upload of `DTMLMethod` and `DTMLDocument` to store the DTML as a native ``str`` on both Python versions. (`#265 `_) diff --git a/src/Products/PageTemplates/PageTemplateFile.py b/src/Products/PageTemplates/PageTemplateFile.py index a17db1b29f..6ca5e25d0f 100644 --- a/src/Products/PageTemplates/PageTemplateFile.py +++ b/src/Products/PageTemplates/PageTemplateFile.py @@ -12,6 +12,7 @@ ############################################################################## import os +import six from logging import getLogger from AccessControl.class_init import InitializeClass @@ -25,6 +26,7 @@ from OFS.Traversable import Traversable from Products.PageTemplates.Expressions import SecureModuleImporter from Products.PageTemplates.PageTemplate import PageTemplate +from Products.PageTemplates.utils import encodingFromXMLPreamble from Shared.DC.Scripts.Script import Script from Shared.DC.Scripts.Signature import FuncCode from zope.contenttype import guess_content_type @@ -73,9 +75,11 @@ class PageTemplateFile(SimpleItem, Script, PageTemplate, Traversable): security.declareProtected( 'View management screens', 'read', 'document_src') - def __init__(self, filename, _prefix=None, **kw): + def __init__( + self, filename, _prefix=None, encoding=DEFAULT_ENCODING, **kw + ): name = kw.pop('__name__', None) - + self.encoding = encoding basepath, ext = os.path.splitext(filename) if name: @@ -98,7 +102,7 @@ def __init__(self, filename, _prefix=None, **kw): def pt_getContext(self): root = None meth = aq_get(self, 'getPhysicalRoot', None) - if meth is not None: + if callable(meth): root = meth() context = self._getContext() c = {'template': self, @@ -157,7 +161,6 @@ def _cook_check(self): if self._v_program is not None and mtime == self._v_last_read: return text, type_ = self._read_file() - # FIXME: text is a binary_type when it's XML. self.pt_edit(text, type_) self._cook() if self._v_errors: @@ -168,32 +171,35 @@ def _cook_check(self): def _prepare_html(self, text): match = meta_pattern.search(text) if match is not None: - type_, encoding = (x.decode('utf-8') for x in match.groups()) + type_, encoding = (x.decode(self.encoding) for x in match.groups()) # TODO: Shouldn't / stripping # be in PageTemplate.__call__()? text = meta_pattern.sub(b"", text) else: type_ = None - encoding = DEFAULT_ENCODING + encoding = self.encoding text = text.decode(encoding) return text, type_ + def _prepare_xml(self, text): + if not isinstance(text, six.text_type): + encoding = encodingFromXMLPreamble(text, default=self.encoding) + text = text.decode(encoding) + return text, 'text/xml' + def _read_file(self): __traceback_info__ = self.filename - f = open(self.filename, "rb") - try: + with open(self.filename, "rb") as f: text = f.read(XML_PREFIX_MAX_LENGTH) - except: - f.close() - raise - type_ = sniff_type(text) - text += f.read() + type_ = sniff_type(text) + text += f.read() if type_ != "text/xml": text, type_ = self._prepare_html(text) + else: + text, type_ = self._prepare_xml(text) f.close() return text, type_ - def document_src(self, REQUEST=None, RESPONSE=None): """Return expanded document source.""" diff --git a/src/Products/PageTemplates/tests/testZopePageTemplate.py b/src/Products/PageTemplates/tests/testZopePageTemplate.py index 29a7ebc24e..9f527f0e6d 100644 --- a/src/Products/PageTemplates/tests/testZopePageTemplate.py +++ b/src/Products/PageTemplates/tests/testZopePageTemplate.py @@ -5,7 +5,6 @@ Ensures that adding a page template works correctly. """ -import sys import unittest import transaction @@ -21,12 +20,13 @@ from Products.PageTemplates.utils import encodingFromXMLPreamble from Products.PageTemplates.utils import charsetFromMetaEquiv from zope.component import provideUtility +from zope.pagetemplate.pagetemplatefile import DEFAULT_ENCODING from Products.PageTemplates.interfaces import IUnicodeEncodingConflictResolver from Products.PageTemplates.unicodeconflictresolver \ import PreferredCharsetResolver import Zope2 -from six import text_type, binary_type +from six import text_type ascii_binary = b'hello world' iso885915_binary = u'üöäÜÖÄß'.encode('iso-8859-15') @@ -39,7 +39,7 @@ ''' xml_binary_iso_8859_15 = (xml_template % 'iso-8859-15').encode('iso-8859-15') -xml_binary_utf8 = (xml_template % 'utf-8').encode('utf-8') +xml_binary_utf8 = (xml_template % 'utf-8').encode('utf-8') html_template_w_header = u''' @@ -86,7 +86,7 @@ class ZPTUtilsTests(unittest.TestCase): def testExtractEncodingFromXMLPreamble(self): extract = encodingFromXMLPreamble - self.assertEqual(extract(b''), 'utf-8') + self.assertEqual(extract(b''), DEFAULT_ENCODING) self.assertEqual(extract(b''), 'utf-8') diff --git a/src/Products/PageTemplates/tests/test_ptfile.py b/src/Products/PageTemplates/tests/test_ptfile.py index 833c8eccf1..2b8af2b012 100644 --- a/src/Products/PageTemplates/tests/test_ptfile.py +++ b/src/Products/PageTemplates/tests/test_ptfile.py @@ -1,3 +1,4 @@ +# coding=utf-8 """Tests of PageTemplateFile.""" import os @@ -29,11 +30,13 @@ def tearDown(self): if os.path.exists(self.TEMPFILENAME): os.unlink(self.TEMPFILENAME) - def check_content_type(self, text, expected_type): - f = open(self.TEMPFILENAME, "wb") - f.write(text) - f.close() - pt = PageTemplateFile(self.TEMPFILENAME) + def check_content_type(self, bytes, expected_type, encoding=None): + with open(self.TEMPFILENAME, "wb") as f: + f.write(bytes) + if encoding: + pt = PageTemplateFile(self.TEMPFILENAME, encoding=encoding) + else: + pt = PageTemplateFile(self.TEMPFILENAME) pt.read() self.assertEqual(pt.content_type, expected_type) @@ -55,63 +58,43 @@ def test_sniffer_xml_utf8(self): "text/xml") # with byte order mark self.check_content_type( - b"\xef\xbb\xbf", + b"", "text/xml") self.check_content_type( - b"\xef\xbb\xbf", + b"", "text/xml") def test_sniffer_xml_utf16_be(self): + u_example1 = u'' + u_example2 = u'' + b_example1 = u_example1.encode('utf-16-be') + b_example2 = u_example2.encode('utf-16-be') # w/out byte order mark - self.check_content_type( - b"\0<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'" - b"\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>" - b"\0<\0d\0o\0c\0/\0>", - "text/xml") - self.check_content_type( - b"\0<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'" - b"\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>" - b"\0<\0d\0o\0c\0/\0>", - "text/xml") + self.check_content_type(b_example1, "text/xml", encoding='utf-16-be') + self.check_content_type(b_example2, "text/xml", encoding='utf-16-be') # with byte order mark self.check_content_type( - b"\xfe\xff" - b"\0<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'" - b"\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>" - b"\0<\0d\0o\0c\0/\0>", - "text/xml") + b"\xfe\xff" + b_example1, "text/xml", encoding='utf-16-be' + ) self.check_content_type( - b"\xfe\xff" - b"\0<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'" - b"\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>" - b"\0<\0d\0o\0c\0/\0>", - "text/xml") + b"\xfe\xff" + b_example2, "text/xml", encoding='utf-16-be' + ) def test_sniffer_xml_utf16_le(self): + u_example1 = u'' + u_example2 = u'' + b_example1 = u_example1.encode('utf-16-le') + b_example2 = u_example2.encode('utf-16-le') # w/out byte order mark - self.check_content_type( - b"<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0" - b" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0" - b"<\0d\0o\0c\0/\0>\n", - "text/xml") - self.check_content_type( - b"<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0" - b" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0" - b"<\0d\0o\0c\0/\0>\0", - "text/xml") + self.check_content_type(b_example1, "text/xml", encoding='utf-16-le') + self.check_content_type(b_example2, "text/xml", encoding='utf-16-le') # with byte order mark self.check_content_type( - b"\xff\xfe" - b"<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0" - b" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0" - b"<\0d\0o\0c\0/\0>\0", - "text/xml") + b"\xff\xfe" + b_example1, "text/xml", encoding='utf-16-le' + ) self.check_content_type( - b"\xff\xfe" - b"<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0" - b" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0" - b"<\0d\0o\0c\0/\0>\0", - "text/xml") + b"\xff\xfe" + b_example2, "text/xml", encoding='utf-16-le' + ) HTML_PUBLIC_ID = b"-//W3C//DTD HTML 4.01 Transitional//EN" HTML_SYSTEM_ID = b"http://www.w3.org/TR/html4/loose.dtd" @@ -210,3 +193,16 @@ def test_lazy(self): f.close() pt = PageTemplateFile(self.TEMPFILENAME) self.assertTrue(not pt._text and not pt._v_program) + + +class RenderTestCase(unittest.TestCase): + + def testXMLPageTemplateFile(self): + dirname = os.path.dirname(__file__) + + filename = os.path.join(dirname, 'utf8.xml') + with open(filename, 'rb') as f: + self.assertEqual( + PageTemplateFile(filename).pt_render(), + f.read().decode('utf8'), + ) diff --git a/src/Products/PageTemplates/tests/utf8.xml b/src/Products/PageTemplates/tests/utf8.xml new file mode 100644 index 0000000000..fd031a2b0b --- /dev/null +++ b/src/Products/PageTemplates/tests/utf8.xml @@ -0,0 +1,4 @@ + + +üöäÜÖÄß + diff --git a/src/Products/PageTemplates/utils.py b/src/Products/PageTemplates/utils.py index 6d4785b8fb..c2d2837a43 100644 --- a/src/Products/PageTemplates/utils.py +++ b/src/Products/PageTemplates/utils.py @@ -14,7 +14,8 @@ """ import re -import sys + +from zope.pagetemplate.pagetemplatefile import DEFAULT_ENCODING xml_preamble_reg = re.compile( br'^<\?xml.*?encoding="(.*?)".*?\?>', re.M) @@ -24,7 +25,7 @@ br'charset.*?=.*?(?P[\w\-]*)', re.I | re.M | re.S) -def encodingFromXMLPreamble(xml): +def encodingFromXMLPreamble(xml, default=DEFAULT_ENCODING): """ Extract the encoding from a xml preamble. Expects XML content is binary (encoded), otherwise a previous transport encoding is meaningless. @@ -34,7 +35,7 @@ def encodingFromXMLPreamble(xml): match = xml_preamble_reg.match(xml) if not match: - return 'utf-8' + return default encoding = match.group(1).lower() return encoding.decode('ascii') @@ -85,6 +86,6 @@ def convertToUnicode(source, content_type, preferred_encodings): return source.decode(enc), enc except UnicodeDecodeError: continue - + # trigger a UnicodeDecodeError so we fail loudly return source.decode('utf-8'), 'utf-8'