New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix "Twisted Web in 60 Seconds" examples on Python 3 #894

Merged
merged 18 commits into from Oct 7, 2017
Jump to file or symbol
Failed to load files and symbols.
+130 −17
Diff settings

Always

Just for now

Viewing a subset of changes. View all

static.File decodes request paths as UTF-8 with a unicode FilePath.

If the underlying path is itself unicode, then the request path should
be decoded as UTF-8 per RFC 2047: https://tools.ietf.org/html/rfc2047

To ensure compatibility with Python 3, all other paths should be
native strings.
  • Loading branch information...
markrwilliams committed Oct 6, 2017
commit ea1608e8eabc429adf27bd55bd5ea5edf736bd6d
View
@@ -12,7 +12,6 @@
import itertools
import mimetypes
import os
import sys
import time
import warnings
@@ -23,8 +22,8 @@
from twisted.web import http
from twisted.web.util import redirectTo
from twisted.python.compat import (_PY3, StringType, intToBytes, nativeString,
networkString)
from twisted.python.compat import (_PY3, intToBytes, nativeString,
networkString, unicode)
from twisted.python.compat import escape
from twisted.python import components, filepath, log
@@ -246,7 +245,7 @@ def __init__(self, path, defaultType="text/html", ignoredExts=(), registry=None,
if ignoredExts in (0, 1) or allowExt:
warnings.warn("ignoredExts should receive a list, not a boolean")
if ignoredExts or allowExt:
self.ignoredExts = [b'*']
self.ignoredExts = ['*']
else:
self.ignoredExts = []
else:
@@ -274,11 +273,33 @@ def directoryListing(self):
def getChild(self, path, request):
"""
If this L{File}'s path refers to a directory, return a L{File}
If this L{File}"s path refers to a directory, return a L{File}
referring to the file named C{path} in that directory.
If C{path} is the empty string, return a L{DirectoryLister} instead.
If C{path} is the empty string, return a L{DirectoryLister}
instead.
@param path: The current path segment.
@type path: L{bytes}
@param request: The incoming request.
@type request: An that provides L{twisted.web.iweb.IRequest}.
@return: A resource representing the requested file or
directory, or L{NoResource} if the path cannot be
accessed.
@rtype: An object that provides L{resource.IResource}.
"""
if isinstance(self.path, unicode):
try:
# Request calls urllib.unquote on each path segment,
# leaving us with raw bytes.
path = path.decode('utf-8')

This comment has been minimized.

@rodrigc

rodrigc Oct 6, 2017

Contributor

@markrwilliams This part of the patch is wrong, and breaks several builders.
unicode does not have a .decode method

@rodrigc

rodrigc Oct 6, 2017

Contributor

@markrwilliams This part of the patch is wrong, and breaks several builders.
unicode does not have a .decode method

This comment has been minimized.

@markrwilliams

markrwilliams Oct 6, 2017

Member

These are the tests that appear to break:

https://github.com/twisted/twisted/blob/trunk/src/twisted/web/test/test_tap.py#L97

https://github.com/twisted/twisted/blob/trunk/src/twisted/web/test/test_tap.py#L107

https://github.com/twisted/twisted/blob/trunk/src/twisted/web/test/test_tap.py#L120

They pass native strings as the path argument to Resource.getChild, which is wrong. URL paths in twisted.web are always bytes:

https://github.com/twisted/twisted/blob/trunk/src/twisted/web/http.py#L803-L818

https://github.com/twisted/twisted/blob/trunk/src/twisted/web/server.py#L168-L197

FilePaths can describe either unicode or bytes paths. static.File is a subclass of FilePath, so self.path here can be either unicode or bytes. This is supposed to decode the request path only if self.path describes a unicode path; the original code is correct, .decode needs to be there, and those tests need to use bytes instead of native strings for their request paths.

@markrwilliams

markrwilliams Oct 6, 2017

Member

These are the tests that appear to break:

https://github.com/twisted/twisted/blob/trunk/src/twisted/web/test/test_tap.py#L97

https://github.com/twisted/twisted/blob/trunk/src/twisted/web/test/test_tap.py#L107

https://github.com/twisted/twisted/blob/trunk/src/twisted/web/test/test_tap.py#L120

They pass native strings as the path argument to Resource.getChild, which is wrong. URL paths in twisted.web are always bytes:

https://github.com/twisted/twisted/blob/trunk/src/twisted/web/http.py#L803-L818

https://github.com/twisted/twisted/blob/trunk/src/twisted/web/server.py#L168-L197

FilePaths can describe either unicode or bytes paths. static.File is a subclass of FilePath, so self.path here can be either unicode or bytes. This is supposed to decode the request path only if self.path describes a unicode path; the original code is correct, .decode needs to be there, and those tests need to use bytes instead of native strings for their request paths.

This comment has been minimized.

@rodrigc

rodrigc Oct 6, 2017

Contributor

@markrwilliams You approved this PR. Is it OK to merge this PR as is, and fix the test_tap.py usages in a follow-up PR, or do you want everything fixed in this PR?

@rodrigc

rodrigc Oct 6, 2017

Contributor

@markrwilliams You approved this PR. Is it OK to merge this PR as is, and fix the test_tap.py usages in a follow-up PR, or do you want everything fixed in this PR?

except UnicodeDecodeError:
log.err(None,
"Could not decode path segment as utf-8: %r" % (path,))
return self.childNotFound
self.restat(reraise=False)
if not self.isdir():
@@ -300,8 +321,6 @@ def getChild(self, path, request):
return self.childNotFound
extension = fpath.splitext()[1]
if not isinstance(extension, StringType):
extension = extension.decode(sys.getfilesystemencoding())
if platformType == "win32":
# don't want .RPY to be different than .rpy, since that would allow
# source disclosure.
@@ -334,7 +353,7 @@ def _parseRangeHeader(self, range):
@return: A list C{[(start, stop)]} of pairs of length at least one.
@raise ValueError: if the header is syntactically invalid or if the
Bytes-Unit is anything other than 'bytes'.
Bytes-Unit is anything other than "bytes'.
"""
try:
kind, value = range.split(b'=', 1)
@@ -9,6 +9,8 @@
import mimetypes
import os
import re
import sys
import warnings
from io import BytesIO as StringIO
@@ -65,6 +67,44 @@ class StaticFileTests(TestCase):
def _render(self, resource, request):
return _render(resource, request)
def test_ignoredExtTrue(self):
"""
Passing C{1} as the value to L{File}'s C{ignoredExts} argument
issues a warning and sets the ignored extensions to the
wildcard C{"*"}.
"""
with warnings.catch_warnings(record=True) as caughtWarnings:
file = static.File(self.mktemp(), ignoredExts=1)
self.assertEqual(file.ignoredExts, ["*"])
self.assertEqual(len(caughtWarnings), 1)
def test_ignoredExtFalse(self):
"""
Passing C{1} as the value to L{File}'s C{ignoredExts} argument
issues a warning and sets the ignored extensions to the empty
list.
"""
with warnings.catch_warnings(record=True) as caughtWarnings:
file = static.File(self.mktemp(), ignoredExts=0)
self.assertEqual(file.ignoredExts, [])
self.assertEqual(len(caughtWarnings), 1)
def test_allowExt(self):
"""
Passing C{1} as the value to L{File}'s C{allowExt} argument
issues a warning and sets the ignored extensions to the
wildcard C{*}.
"""
with warnings.catch_warnings(record=True) as caughtWarnings:
file = static.File(self.mktemp(), ignoredExts=True)
self.assertEqual(file.ignoredExts, ["*"])
self.assertEqual(len(caughtWarnings), 1)
def test_invalidMethod(self):
"""
@@ -158,6 +198,30 @@ def cbRendered(ignored):
test_forbiddenResource.skip = "Cannot remove read permission on Windows"
def test_undecodablePath(self):
"""
A request whose path cannot be decoded as UTF-8 receives a not
found response, and the failure is logged.
"""
path = self.mktemp()
if not _PY3:
path = path.decode('ascii')
base = FilePath(path)
base.makedirs()
file = static.File(base.path)
request = DummyRequest([b"\xff"])
child = resource.getChildForRequest(file, request)
d = self._render(child, request)
def cbRendered(ignored):
self.assertEqual(request.responseCode, 404)
self.assertEqual(len(self.flushLoggedErrors(UnicodeDecodeError)),
1)
d.addCallback(cbRendered)
return d
def test_forbiddenResource_default(self):
"""
L{File.forbidden} defaults to L{resource.ForbiddenResource}.
@@ -205,7 +269,7 @@ def test_indexNames(self):
base.makedirs()
base.child("foo.bar").setContent(b"baz")
file = static.File(base.path)
file.indexNames = [b'foo.bar']
file.indexNames = ['foo.bar']
request = DummyRequest([b''])
child = resource.getChildForRequest(file, request)
@@ -244,14 +308,44 @@ def cbRendered(ignored):
return d
def test_staticFileUnicodeFileName(self):
"""
A request for a existing unicode file path encoded as UTF-8
returns the contents of that file.
"""
name = u"\N{GREEK SMALL LETTER ETA WITH PERISPOMENI}"
content = b"content"
base = FilePath(self.mktemp())
base.makedirs()
base.child(name).setContent(content)
file = static.File(base.path)
request = DummyRequest([name.encode('utf-8')])
child = resource.getChildForRequest(file, request)
d = self._render(child, request)
def cbRendered(ignored):
self.assertEqual(b''.join(request.written), content)
self.assertEqual(
request.responseHeaders.getRawHeaders(b'content-length')[0],
networkString(str(len(content))))
d.addCallback(cbRendered)
return d
if sys.getfilesystemencoding().lower() not in ('utf-8', 'mcbs'):
test_staticFileUnicodeFileName.skip = (
"Cannot write unicode filenames with file system encoding of"
" %s" % (sys.getfilesystemencoding(),))
def test_staticFileDeletedGetChild(self):
"""
A L{static.File} created for a directory which does not exist should
return childNotFound from L{static.File.getChild}.
"""
staticFile = static.File(self.mktemp())
request = DummyRequest([b'foo.bar'])
child = staticFile.getChild("foo.bar", request)
child = staticFile.getChild(b"foo.bar", request)
self.assertEqual(child, staticFile.childNotFound)
@@ -352,12 +446,12 @@ def test_ignoreExt(self):
"""
file = static.File(b".")
self.assertEqual(file.ignoredExts, [])
file.ignoreExt(b".foo")
file.ignoreExt(b".bar")
self.assertEqual(file.ignoredExts, [b".foo", b".bar"])
file.ignoreExt(".foo")
file.ignoreExt(".bar")
self.assertEqual(file.ignoredExts, [".foo", ".bar"])
file = static.File(b".", ignoredExts=(b".bar", b".baz"))
self.assertEqual(file.ignoredExts, [b".bar", b".baz"])
file = static.File(b".", ignoredExts=(".bar", ".baz"))
self.assertEqual(file.ignoredExts, [".bar", ".baz"])
def test_ignoredExtensionsIgnored(self):
@@ -371,7 +465,7 @@ def test_ignoredExtensionsIgnored(self):
base.makedirs()
base.child('foo.bar').setContent(b'baz')
base.child('foo.quux').setContent(b'foobar')
file = static.File(base.path, ignoredExts=(b".bar",))
file = static.File(base.path, ignoredExts=(".bar",))
request = DummyRequest([b"foo"])
child = resource.getChildForRequest(file, request)
ProTip! Use n and p to navigate between commits in a pull request.