Skip to content

Commit

Permalink
Merge of 37885 from trunk
Browse files Browse the repository at this point in the history
  • Loading branch information
stub42 committed Aug 12, 2005
0 parents commit fa306d8
Show file tree
Hide file tree
Showing 3 changed files with 628 additions and 0 deletions.
214 changes: 214 additions & 0 deletions http/httprequestparser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""HTTP Request Parser
This server uses asyncore to accept connections and do initial
processing but threads to do work.
$Id$
"""
import re
from urllib import unquote

from zope.server.fixedstreamreceiver import FixedStreamReceiver
from zope.server.buffers import OverflowableBuffer
from zope.server.utilities import find_double_newline
from zope.server.interfaces import IStreamConsumer
from zope.interface import implements

try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO


class HTTPRequestParser(object):
"""A structure that collects the HTTP request.
Once the stream is completed, the instance is passed to
a server task constructor.
"""

implements(IStreamConsumer)

completed = 0 # Set once request is completed.
empty = 0 # Set if no request was made.
header_plus = ''
chunked = 0
content_length = 0
body_rcv = None
# Other attributes: first_line, header, headers, command, uri, version,
# path, query, fragment

# headers is a mapping containing keys translated to uppercase
# with dashes turned into underscores.

def __init__(self, adj):
"""
adj is an Adjustments object.
"""
self.headers = {}
self.adj = adj

def received(self, data):
"""
Receives the HTTP stream for one request.
Returns the number of bytes consumed.
Sets the completed flag once both the header and the
body have been received.
"""
if self.completed:
return 0 # Can't consume any more.
datalen = len(data)
br = self.body_rcv
if br is None:
# In header.
s = self.header_plus + data
index = find_double_newline(s)
if index >= 0:
# Header finished.
header_plus = s[:index]
consumed = len(data) - (len(s) - index)
self.in_header = 0
# Remove preceeding blank lines.
header_plus = header_plus.lstrip()
if not header_plus:
self.empty = 1
self.completed = 1
else:
self.parse_header(header_plus)
if self.body_rcv is None:
self.completed = 1
return consumed
else:
# Header not finished yet.
self.header_plus = s
return datalen
else:
# In body.
consumed = br.received(data)
if br.completed:
self.completed = 1
return consumed


def parse_header(self, header_plus):
"""
Parses the header_plus block of text (the headers plus the
first line of the request).
"""
index = header_plus.find('\n')
if index >= 0:
first_line = header_plus[:index].rstrip()
header = header_plus[index + 1:]
else:
first_line = header_plus.rstrip()
header = ''
self.first_line = first_line
self.header = header

lines = self.get_header_lines()
headers = self.headers
for line in lines:
index = line.find(':')
if index > 0:
key = line[:index]
value = line[index + 1:].strip()
key1 = key.upper().replace('-', '_')
# If a header already exists, we append subsequent values
# seperated by a comma. Applications already need to handle
# the comma seperated values, as HTTP front ends might do
# the concatenation for you (behavior specified in RFC2616).
try:
headers[key1] += ', %s' % value
except KeyError:
headers[key1] = value
# else there's garbage in the headers?

command, uri, version = self.crack_first_line()
self.command = str(command)
self.uri = str(uri)
self.version = version
self.split_uri()

if version == '1.1':
te = headers.get('TRANSFER_ENCODING', '')
if te == 'chunked':
from zope.server.http.chunking import ChunkedReceiver
self.chunked = 1
buf = OverflowableBuffer(self.adj.inbuf_overflow)
self.body_rcv = ChunkedReceiver(buf)
if not self.chunked:
try:
cl = int(headers.get('CONTENT_LENGTH', 0))
except ValueError:
cl = 0
self.content_length = cl
if cl > 0:
buf = OverflowableBuffer(self.adj.inbuf_overflow)
self.body_rcv = FixedStreamReceiver(cl, buf)


def get_header_lines(self):
"""
Splits the header into lines, putting multi-line headers together.
"""
r = []
lines = self.header.split('\n')
for line in lines:
if line and line[0] in ' \t':
r[-1] = r[-1] + line[1:]
else:
r.append(line)
return r

first_line_re = re.compile (
'([^ ]+) (?:[^ :?#]+://[^ ?#/]*)?([^ ]+)(( HTTP/([0-9.]+))$|$)')

def crack_first_line(self):
r = self.first_line
m = self.first_line_re.match (r)
if m is not None and m.end() == len(r):
if m.group(3):
version = m.group(5)
else:
version = None
return m.group(1).upper(), m.group(2), version
else:
return None, None, None

path_regex = re.compile (
# path query fragment
r'([^?#]*)(\?[^#]*)?(#.*)?'
)

def split_uri(self):
m = self.path_regex.match (self.uri)
if m.end() != len(self.uri):
raise ValueError, "Broken URI"
else:
path, query, self.fragment = m.groups()
if path and '%' in path:
path = unquote(path)
self.path = path
if query:
query = query[1:]
self.query = query

def getBodyStream(self):
body_rcv = self.body_rcv
if body_rcv is not None:
return body_rcv.getfile()
else:
return StringIO('')
113 changes: 113 additions & 0 deletions http/tests/test_httprequestparser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""HTTP Request Parser tests
$Id$
"""
import unittest
from zope.server.http.httprequestparser import HTTPRequestParser
from zope.server.adjustments import Adjustments


my_adj = Adjustments()

class Tests(unittest.TestCase):

def setUp(self):
self.parser = HTTPRequestParser(my_adj)

def feed(self, data):
parser = self.parser
for n in xrange(100): # make sure we never loop forever
consumed = parser.received(data)
data = data[consumed:]
if parser.completed:
return
raise ValueError, 'Looping'

def testSimpleGET(self):
data = """\
GET /foobar HTTP/8.4
FirstName: mickey
lastname: Mouse
content-length: 7
Hello.
"""
parser = self.parser
self.feed(data)
self.failUnless(parser.completed)
self.assertEqual(parser.version, '8.4')
self.failIf(parser.empty)
self.assertEqual(parser.headers,
{'FIRSTNAME': 'mickey',
'LASTNAME': 'Mouse',
'CONTENT_LENGTH': '7',
})
self.assertEqual(parser.path, '/foobar')
self.assertEqual(parser.command, 'GET')
self.assertEqual(parser.query, None)
self.assertEqual(parser.getBodyStream().getvalue(), 'Hello.\n')

def testComplexGET(self):
data = """\
GET /foo/a+%2B%2F%C3%A4%3D%26a%3Aint?d=b+%2B%2F%3D%26b%3Aint&c+%2B%2F%3D%26c%3Aint=6 HTTP/8.4
FirstName: mickey
lastname: Mouse
content-length: 10
Hello mickey.
"""
parser = self.parser
self.feed(data)
self.assertEqual(parser.command, 'GET')
self.assertEqual(parser.version, '8.4')
self.failIf(parser.empty)
self.assertEqual(parser.headers,
{'FIRSTNAME': 'mickey',
'LASTNAME': 'Mouse',
'CONTENT_LENGTH': '10',
})
# path should be utf-8 encoded
self.assertEqual(parser.path, '/foo/a++/\xc3\xa4=&a:int')
self.assertEqual(parser.query,
'd=b+%2B%2F%3D%26b%3Aint&c+%2B%2F%3D%26c%3Aint=6')
self.assertEqual(parser.getBodyStream().getvalue(), 'Hello mick')

def testDuplicateHeaders(self):
# Ensure that headers with the same key get concatenated as per
# RFC2616.
data = """\
GET /foobar HTTP/8.4
x-forwarded-for: 10.11.12.13
x-forwarded-for: unknown,127.0.0.1
X-Forwarded_for: 255.255.255.255
content-length: 7
Hello.
"""
self.feed(data)
self.failUnless(self.parser.completed)
self.assertEqual(self.parser.headers, {
'CONTENT_LENGTH': '7',
'X_FORWARDED_FOR':
'10.11.12.13, unknown,127.0.0.1, 255.255.255.255',
})

def test_suite():
loader = unittest.TestLoader()
return loader.loadTestsFromTestCase(Tests)

if __name__=='__main__':
unittest.TextTestRunner().run(test_suite())
Loading

0 comments on commit fa306d8

Please sign in to comment.