Skip to content

Commit

Permalink
Merge eea4541 into b6c8391
Browse files Browse the repository at this point in the history
  • Loading branch information
ale-rt committed Oct 1, 2018
2 parents b6c8391 + eea4541 commit 405c2ff
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 69 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ New features
Bugfixes
++++++++

- Fix XML Page template files in Python 3
(`#319 <https://github.com/zopefoundation/Zope/issues/319>`_)

- Fix ZMI upload of `DTMLMethod` and `DTMLDocument` to store the DTML as a
native ``str`` on both Python versions.
(`#265 <https://github.com/zopefoundation/Zope/pull/265>`_)
Expand Down
34 changes: 20 additions & 14 deletions src/Products/PageTemplates/PageTemplateFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
##############################################################################

import os
import six
from logging import getLogger

from AccessControl.class_init import InitializeClass
Expand All @@ -25,6 +26,7 @@
from OFS.Traversable import Traversable
from Products.PageTemplates.Expressions import SecureModuleImporter
from Products.PageTemplates.PageTemplate import PageTemplate
from Products.PageTemplates.utils import encodingFromXMLPreamble
from Shared.DC.Scripts.Script import Script
from Shared.DC.Scripts.Signature import FuncCode
from zope.contenttype import guess_content_type
Expand Down Expand Up @@ -73,9 +75,11 @@ class PageTemplateFile(SimpleItem, Script, PageTemplate, Traversable):
security.declareProtected(
'View management screens', 'read', 'document_src')

def __init__(self, filename, _prefix=None, **kw):
def __init__(
self, filename, _prefix=None, encoding=DEFAULT_ENCODING, **kw
):
name = kw.pop('__name__', None)

self.encoding = encoding
basepath, ext = os.path.splitext(filename)

if name:
Expand All @@ -98,7 +102,7 @@ def __init__(self, filename, _prefix=None, **kw):
def pt_getContext(self):
root = None
meth = aq_get(self, 'getPhysicalRoot', None)
if meth is not None:
if callable(meth):
root = meth()
context = self._getContext()
c = {'template': self,
Expand Down Expand Up @@ -157,7 +161,6 @@ def _cook_check(self):
if self._v_program is not None and mtime == self._v_last_read:
return
text, type_ = self._read_file()
# FIXME: text is a binary_type when it's XML.
self.pt_edit(text, type_)
self._cook()
if self._v_errors:
Expand All @@ -168,32 +171,35 @@ def _cook_check(self):
def _prepare_html(self, text):
match = meta_pattern.search(text)
if match is not None:
type_, encoding = (x.decode('utf-8') for x in match.groups())
type_, encoding = (x.decode(self.encoding) for x in match.groups())
# TODO: Shouldn't <meta>/<?xml?> stripping
# be in PageTemplate.__call__()?
text = meta_pattern.sub(b"", text)
else:
type_ = None
encoding = DEFAULT_ENCODING
encoding = self.encoding
text = text.decode(encoding)
return text, type_

def _prepare_xml(self, text):
if not isinstance(text, six.text_type):
encoding = encodingFromXMLPreamble(text, default=self.encoding)
text = text.decode(encoding)
return text, 'text/xml'

def _read_file(self):
__traceback_info__ = self.filename
f = open(self.filename, "rb")
try:
with open(self.filename, "rb") as f:
text = f.read(XML_PREFIX_MAX_LENGTH)
except:
f.close()
raise
type_ = sniff_type(text)
text += f.read()
type_ = sniff_type(text)
text += f.read()
if type_ != "text/xml":
text, type_ = self._prepare_html(text)
else:
text, type_ = self._prepare_xml(text)
f.close()
return text, type_


def document_src(self, REQUEST=None, RESPONSE=None):
"""Return expanded document source."""

Expand Down
8 changes: 4 additions & 4 deletions src/Products/PageTemplates/tests/testZopePageTemplate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
Ensures that adding a page template works correctly.
"""

import sys
import unittest
import transaction

Expand All @@ -21,12 +20,13 @@
from Products.PageTemplates.utils import encodingFromXMLPreamble
from Products.PageTemplates.utils import charsetFromMetaEquiv
from zope.component import provideUtility
from zope.pagetemplate.pagetemplatefile import DEFAULT_ENCODING
from Products.PageTemplates.interfaces import IUnicodeEncodingConflictResolver
from Products.PageTemplates.unicodeconflictresolver \
import PreferredCharsetResolver
import Zope2

from six import text_type, binary_type
from six import text_type

ascii_binary = b'<html><body>hello world</body></html>'
iso885915_binary = u'<html><body>üöäÜÖÄß</body></html>'.encode('iso-8859-15')
Expand All @@ -39,7 +39,7 @@
'''

xml_binary_iso_8859_15 = (xml_template % 'iso-8859-15').encode('iso-8859-15')
xml_binary_utf8 = (xml_template % 'utf-8').encode('utf-8')
xml_binary_utf8 = (xml_template % 'utf-8').encode('utf-8')

html_template_w_header = u'''
<html>
Expand Down Expand Up @@ -86,7 +86,7 @@ class ZPTUtilsTests(unittest.TestCase):

def testExtractEncodingFromXMLPreamble(self):
extract = encodingFromXMLPreamble
self.assertEqual(extract(b'<?xml version="1.0" ?>'), 'utf-8')
self.assertEqual(extract(b'<?xml version="1.0" ?>'), DEFAULT_ENCODING)
self.assertEqual(extract(b'<?xml encoding="utf-8" '
b'version="1.0" ?>'),
'utf-8')
Expand Down
90 changes: 43 additions & 47 deletions src/Products/PageTemplates/tests/test_ptfile.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# coding=utf-8
"""Tests of PageTemplateFile."""

import os
Expand Down Expand Up @@ -29,11 +30,13 @@ def tearDown(self):
if os.path.exists(self.TEMPFILENAME):
os.unlink(self.TEMPFILENAME)

def check_content_type(self, text, expected_type):
f = open(self.TEMPFILENAME, "wb")
f.write(text)
f.close()
pt = PageTemplateFile(self.TEMPFILENAME)
def check_content_type(self, bytes, expected_type, encoding=None):
with open(self.TEMPFILENAME, "wb") as f:
f.write(bytes)
if encoding:
pt = PageTemplateFile(self.TEMPFILENAME, encoding=encoding)
else:
pt = PageTemplateFile(self.TEMPFILENAME)
pt.read()
self.assertEqual(pt.content_type, expected_type)

Expand All @@ -55,63 +58,43 @@ def test_sniffer_xml_utf8(self):
"text/xml")
# with byte order mark
self.check_content_type(
b"\xef\xbb\xbf<?xml version='1.0' encoding='utf-8'?><doc/>",
b"<?xml version='1.0' encoding='utf-8'?><doc/>",
"text/xml")
self.check_content_type(
b"\xef\xbb\xbf<?xml\tversion='1.0' encoding='utf-8'?><doc/>",
b"<?xml\tversion='1.0' encoding='utf-8'?><doc/>",
"text/xml")

def test_sniffer_xml_utf16_be(self):
u_example1 = u'<?xml version=".0" encoding="utf-16-be"?><doc/>'
u_example2 = u'<?xml version=".0" encoding="utf-16-be"?><doc/>'
b_example1 = u_example1.encode('utf-16-be')
b_example2 = u_example2.encode('utf-16-be')
# w/out byte order mark
self.check_content_type(
b"\0<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'"
b"\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>"
b"\0<\0d\0o\0c\0/\0>",
"text/xml")
self.check_content_type(
b"\0<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'"
b"\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>"
b"\0<\0d\0o\0c\0/\0>",
"text/xml")
self.check_content_type(b_example1, "text/xml", encoding='utf-16-be')
self.check_content_type(b_example2, "text/xml", encoding='utf-16-be')
# with byte order mark
self.check_content_type(
b"\xfe\xff"
b"\0<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'"
b"\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>"
b"\0<\0d\0o\0c\0/\0>",
"text/xml")
b"\xfe\xff" + b_example1, "text/xml", encoding='utf-16-be'
)
self.check_content_type(
b"\xfe\xff"
b"\0<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'"
b"\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>"
b"\0<\0d\0o\0c\0/\0>",
"text/xml")
b"\xfe\xff" + b_example2, "text/xml", encoding='utf-16-be'
)

def test_sniffer_xml_utf16_le(self):
u_example1 = u'<?xml version=".0" encoding="utf-16-le"?><doc/>'
u_example2 = u'<?xml version=".0" encoding="utf-16-le"?><doc/>'
b_example1 = u_example1.encode('utf-16-le')
b_example2 = u_example2.encode('utf-16-le')
# w/out byte order mark
self.check_content_type(
b"<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0"
b" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0"
b"<\0d\0o\0c\0/\0>\n",
"text/xml")
self.check_content_type(
b"<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0"
b" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0"
b"<\0d\0o\0c\0/\0>\0",
"text/xml")
self.check_content_type(b_example1, "text/xml", encoding='utf-16-le')
self.check_content_type(b_example2, "text/xml", encoding='utf-16-le')
# with byte order mark
self.check_content_type(
b"\xff\xfe"
b"<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0"
b" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0"
b"<\0d\0o\0c\0/\0>\0",
"text/xml")
b"\xff\xfe" + b_example1, "text/xml", encoding='utf-16-le'
)
self.check_content_type(
b"\xff\xfe"
b"<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0"
b" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0"
b"<\0d\0o\0c\0/\0>\0",
"text/xml")
b"\xff\xfe" + b_example2, "text/xml", encoding='utf-16-le'
)

HTML_PUBLIC_ID = b"-//W3C//DTD HTML 4.01 Transitional//EN"
HTML_SYSTEM_ID = b"http://www.w3.org/TR/html4/loose.dtd"
Expand Down Expand Up @@ -210,3 +193,16 @@ def test_lazy(self):
f.close()
pt = PageTemplateFile(self.TEMPFILENAME)
self.assertTrue(not pt._text and not pt._v_program)


class RenderTestCase(unittest.TestCase):

def testXMLPageTemplateFile(self):
dirname = os.path.dirname(__file__)

filename = os.path.join(dirname, 'utf8.xml')
with open(filename, 'rb') as f:
self.assertEqual(
PageTemplateFile(filename).pt_render(),
f.read().decode('utf8'),
)
4 changes: 4 additions & 0 deletions src/Products/PageTemplates/tests/utf8.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<foo>
üöäÜÖÄß
</foo>
9 changes: 5 additions & 4 deletions src/Products/PageTemplates/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
"""

import re
import sys

from zope.pagetemplate.pagetemplatefile import DEFAULT_ENCODING

xml_preamble_reg = re.compile(
br'^<\?xml.*?encoding="(.*?)".*?\?>', re.M)
Expand All @@ -24,7 +25,7 @@
br'charset.*?=.*?(?P<charset>[\w\-]*)', re.I | re.M | re.S)


def encodingFromXMLPreamble(xml):
def encodingFromXMLPreamble(xml, default=DEFAULT_ENCODING):
""" Extract the encoding from a xml preamble.
Expects XML content is binary (encoded), otherwise a previous
transport encoding is meaningless.
Expand All @@ -34,7 +35,7 @@ def encodingFromXMLPreamble(xml):
match = xml_preamble_reg.match(xml)

if not match:
return 'utf-8'
return default
encoding = match.group(1).lower()
return encoding.decode('ascii')

Expand Down Expand Up @@ -85,6 +86,6 @@ def convertToUnicode(source, content_type, preferred_encodings):
return source.decode(enc), enc
except UnicodeDecodeError:
continue

# trigger a UnicodeDecodeError so we fail loudly
return source.decode('utf-8'), 'utf-8'

0 comments on commit 405c2ff

Please sign in to comment.