Merge of 37885 from trunk

zopefoundation · Aug 12, 2005 · fa306d8 · fa306d8
commit fa306d8
Show file tree

Hide file tree

Showing 3 changed files with 628 additions and 0 deletions.
diff --git a/http/httprequestparser.py b/http/httprequestparser.py
@@ -0,0 +1,214 @@
+##############################################################################
+#
+# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""HTTP Request Parser
+
+This server uses asyncore to accept connections and do initial
+processing but threads to do work.
+
+$Id$
+"""
+import re
+from urllib import unquote
+
+from zope.server.fixedstreamreceiver import FixedStreamReceiver
+from zope.server.buffers import OverflowableBuffer
+from zope.server.utilities import find_double_newline
+from zope.server.interfaces import IStreamConsumer
+from zope.interface import implements
+
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+
+
+class HTTPRequestParser(object):
+    """A structure that collects the HTTP request.
+
+    Once the stream is completed, the instance is passed to
+    a server task constructor.
+    """
+
+    implements(IStreamConsumer)
+
+    completed = 0  # Set once request is completed.
+    empty = 0        # Set if no request was made.
+    header_plus = ''
+    chunked = 0
+    content_length = 0
+    body_rcv = None
+    # Other attributes: first_line, header, headers, command, uri, version,
+    # path, query, fragment
+
+    # headers is a mapping containing keys translated to uppercase
+    # with dashes turned into underscores.
+
+    def __init__(self, adj):
+        """
+        adj is an Adjustments object.
+        """
+        self.headers = {}
+        self.adj = adj
+
+    def received(self, data):
+        """
+        Receives the HTTP stream for one request.
+        Returns the number of bytes consumed.
+        Sets the completed flag once both the header and the
+        body have been received.
+        """
+        if self.completed:
+            return 0  # Can't consume any more.
+        datalen = len(data)
+        br = self.body_rcv
+        if br is None:
+            # In header.
+            s = self.header_plus + data
+            index = find_double_newline(s)
+            if index >= 0:
+                # Header finished.
+                header_plus = s[:index]
+                consumed = len(data) - (len(s) - index)
+                self.in_header = 0
+                # Remove preceeding blank lines.
+                header_plus = header_plus.lstrip()
+                if not header_plus:
+                    self.empty = 1
+                    self.completed = 1
+                else:
+                    self.parse_header(header_plus)
+                    if self.body_rcv is None:
+                        self.completed = 1
+                return consumed
+            else:
+                # Header not finished yet.
+                self.header_plus = s
+                return datalen
+        else:
+            # In body.
+            consumed = br.received(data)
+            if br.completed:
+                self.completed = 1
+            return consumed
+
+
+    def parse_header(self, header_plus):
+        """
+        Parses the header_plus block of text (the headers plus the
+        first line of the request).
+        """
+        index = header_plus.find('\n')
+        if index >= 0:
+            first_line = header_plus[:index].rstrip()
+            header = header_plus[index + 1:]
+        else:
+            first_line = header_plus.rstrip()
+            header = ''
+        self.first_line = first_line
+        self.header = header
+
+        lines = self.get_header_lines()
+        headers = self.headers
+        for line in lines:
+            index = line.find(':')
+            if index > 0:
+                key = line[:index]
+                value = line[index + 1:].strip()
+                key1 = key.upper().replace('-', '_')
+                # If a header already exists, we append subsequent values
+                # seperated by a comma. Applications already need to handle
+                # the comma seperated values, as HTTP front ends might do 
+                # the concatenation for you (behavior specified in RFC2616).
+                try:
+                    headers[key1] += ', %s' % value
+                except KeyError:
+                    headers[key1] = value
+            # else there's garbage in the headers?
+
+        command, uri, version = self.crack_first_line()
+        self.command = str(command)
+        self.uri = str(uri)
+        self.version = version
+        self.split_uri()
+
+        if version == '1.1':
+            te = headers.get('TRANSFER_ENCODING', '')
+            if te == 'chunked':
+                from zope.server.http.chunking import ChunkedReceiver
+                self.chunked = 1
+                buf = OverflowableBuffer(self.adj.inbuf_overflow)
+                self.body_rcv = ChunkedReceiver(buf)
+        if not self.chunked:
+            try:
+                cl = int(headers.get('CONTENT_LENGTH', 0))
+            except ValueError:
+                cl = 0
+            self.content_length = cl
+            if cl > 0:
+                buf = OverflowableBuffer(self.adj.inbuf_overflow)
+                self.body_rcv = FixedStreamReceiver(cl, buf)
+
+
+    def get_header_lines(self):
+        """
+        Splits the header into lines, putting multi-line headers together.
+        """
+        r = []
+        lines = self.header.split('\n')
+        for line in lines:
+            if line and line[0] in ' \t':
+                r[-1] = r[-1] + line[1:]
+            else:
+                r.append(line)
+        return r
+
+    first_line_re = re.compile (
+        '([^ ]+) (?:[^ :?#]+://[^ ?#/]*)?([^ ]+)(( HTTP/([0-9.]+))$|$)')
+
+    def crack_first_line(self):
+        r = self.first_line
+        m = self.first_line_re.match (r)
+        if m is not None and m.end() == len(r):
+            if m.group(3):
+                version = m.group(5)
+            else:
+                version = None
+            return m.group(1).upper(), m.group(2), version
+        else:
+            return None, None, None
+
+    path_regex = re.compile (
+    #     path    query     fragment
+        r'([^?#]*)(\?[^#]*)?(#.*)?'
+        )
+
+    def split_uri(self):
+        m = self.path_regex.match (self.uri)
+        if m.end() != len(self.uri):
+            raise ValueError, "Broken URI"
+        else:
+            path, query, self.fragment = m.groups()
+            if path and '%' in path:
+                path = unquote(path)
+            self.path = path
+            if query:
+                query = query[1:]
+            self.query = query
+
+    def getBodyStream(self):
+        body_rcv = self.body_rcv
+        if body_rcv is not None:
+            return body_rcv.getfile()
+        else:
+            return StringIO('')
diff --git a/http/tests/test_httprequestparser.py b/http/tests/test_httprequestparser.py
@@ -0,0 +1,113 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""HTTP Request Parser tests
+
+$Id$
+"""
+import unittest
+from zope.server.http.httprequestparser import HTTPRequestParser
+from zope.server.adjustments import Adjustments
+
+
+my_adj = Adjustments()
+
+class Tests(unittest.TestCase):
+
+    def setUp(self):
+        self.parser = HTTPRequestParser(my_adj)
+
+    def feed(self, data):
+        parser = self.parser
+        for n in xrange(100): # make sure we never loop forever
+            consumed = parser.received(data)
+            data = data[consumed:]
+            if parser.completed:
+                return
+        raise ValueError, 'Looping'
+
+    def testSimpleGET(self):
+        data = """\
+GET /foobar HTTP/8.4
+FirstName: mickey
+lastname: Mouse
+content-length: 7
+
+Hello.
+"""
+        parser = self.parser
+        self.feed(data)
+        self.failUnless(parser.completed)
+        self.assertEqual(parser.version, '8.4')
+        self.failIf(parser.empty)
+        self.assertEqual(parser.headers,
+                         {'FIRSTNAME': 'mickey',
+                          'LASTNAME': 'Mouse',
+                          'CONTENT_LENGTH': '7',
+                          })
+        self.assertEqual(parser.path, '/foobar')
+        self.assertEqual(parser.command, 'GET')
+        self.assertEqual(parser.query, None)
+        self.assertEqual(parser.getBodyStream().getvalue(), 'Hello.\n')
+
+    def testComplexGET(self):
+        data = """\
+GET /foo/a+%2B%2F%C3%A4%3D%26a%3Aint?d=b+%2B%2F%3D%26b%3Aint&c+%2B%2F%3D%26c%3Aint=6 HTTP/8.4
+FirstName: mickey
+lastname: Mouse
+content-length: 10
+
+Hello mickey.
+"""
+        parser = self.parser
+        self.feed(data)
+        self.assertEqual(parser.command, 'GET')
+        self.assertEqual(parser.version, '8.4')
+        self.failIf(parser.empty)
+        self.assertEqual(parser.headers,
+                         {'FIRSTNAME': 'mickey',
+                          'LASTNAME': 'Mouse',
+                          'CONTENT_LENGTH': '10',
+                          })
+        # path should be utf-8 encoded
+        self.assertEqual(parser.path, '/foo/a++/\xc3\xa4=&a:int')
+        self.assertEqual(parser.query,
+                         'd=b+%2B%2F%3D%26b%3Aint&c+%2B%2F%3D%26c%3Aint=6')
+        self.assertEqual(parser.getBodyStream().getvalue(), 'Hello mick')
+
+    def testDuplicateHeaders(self):
+        # Ensure that headers with the same key get concatenated as per
+        # RFC2616.
+        data = """\
+GET /foobar HTTP/8.4
+x-forwarded-for: 10.11.12.13
+x-forwarded-for: unknown,127.0.0.1
+X-Forwarded_for: 255.255.255.255
+content-length: 7
+
+Hello.
+"""
+        self.feed(data)
+        self.failUnless(self.parser.completed)
+        self.assertEqual(self.parser.headers, {
+                'CONTENT_LENGTH': '7',
+                'X_FORWARDED_FOR':
+                    '10.11.12.13, unknown,127.0.0.1, 255.255.255.255',
+                })
+
+def test_suite():
+    loader = unittest.TestLoader()
+    return loader.loadTestsFromTestCase(Tests)
+
+if __name__=='__main__':
+    unittest.TextTestRunner().run(test_suite())